利用 TensorFlow 识别 MNIST 数据集

作者: 拓季 | 来源:发表于2018-02-08 12:19 被阅读0次

    刚刚接触 TensorFlow 一段时间,觉得了解的还太少,因此一直不愿意动笔写关于其应用的笔记。学习代码的一个很好的方法是对于同一个任务,采用多种不同的方法来实现,这样也可以更加直观的比较不同实现方式的优劣。这里列出几个利用 TensorFlow 搭建不同类型的神经网络来实现 MNIST 字体识别的代码,在此可以更加方便的对比不同网络的构建形式。为了便于理解,做了很多的注释,放在这里以备自己查看使用,代码版权归属于相应的作者。

    对于图像数据首先想到的就是利用 CNN 进行处理,这里首先列出从头开始创建 CNN 的实现方式:

    # this cell's code is adopted from Udacity
    import tensorflow as tf
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("./mnist", one_hot=True, reshape=False)
    # parameters
    learning_rate = 0.00001
    epochs = 10
    batch_size = 128
    # number of samples to calculate validation and accuracy
    test_valid_size = 256
    # network parameters
    n_classes = 10
    dropout = 0.75
    # weights and biases
    # the shape of the filter weight is (height, width, input_depth, output_depth)
    weights = {
        'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
        'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
        'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
        'out': tf.Variable(tf.random_normal([1024, n_classes]))}
    # the shape of the filter bias is (output_depth,)
    biases = {
        'bc1': tf.Variable(tf.random_normal([32])),
        'bc2': tf.Variable(tf.random_normal([64])),
        'bd1': tf.Variable(tf.random_normal([1024])),
        'out': tf.Variable(tf.random_normal([n_classes]))}
    # stride for each dimension (batch_size, input_height, input_width, depth)
    # generally always set the stride for batch and input_channels
    # i.e. the first and fourth element in the strides array to be 1
    # This ensures that the model uses all batches and input channels
    # It's good practice to remove the batches or channels you want to skip
    # from the data set rather than use a stride to skip them
    # tf.nn.conv2d requires the input be 4D (batch_size, height, width, depth)
    def conv2d(x, W, b, strides=1):
        x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        # tf.add() doesn't work when the tensors aren't the same shape
        return tf.nn.relu(x)
    def maxpool2d(x, k=2):
        return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
    def conv_net(x, weights, biases, dropout):
        # layer 1 - 28*28*1 to 14*14*32
        conv1 = conv2d(x, weights['wc1'], biases['bc1'])
        conv1 = maxpool2d(conv1, k=2)
        # layer 2 - 14*14*32 to 7*7*64
        conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
        conv2 = maxpool2d(conv2, k=2)
        # fully connected layer - 7*7*64 to 1024
        # tensor.get_shape().as_list() will return the shape of the tensor as a list
        fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
        fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
        fc1 = tf.nn.relu(fc1)
        fc1 = tf.nn.dropout(fc1, dropout)
        # output layer
        out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
        return out
    # session
    x = tf.placeholder(tf.float32, [None, 28, 28, 1])
    y = tf.placeholder(tf.float32, [None, n_classes])
    keep_prob = tf.placeholder(tf.float32)
    # model
    logits = conv_net(x, weights, biases, keep_prob)
    # define loss and optimizer
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    # accuracy
    correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # initializing the viriables
    init = tf.global_variables_initializer()
    # launch the graph
    with tf.Session() as sess:
        for epoch in range(epochs):
            for batch in range(mnist.train.num_examples//batch_size):
                batch_x, batch_y = mnist.train.next_batch(batch_size)
                sess.run(optimizer, feed_dict={
                    x: batch_x,
                    y: batch_y,
                    keep_prob: dropout
                # calculate batch loss and accuracy
                loss = sess.run(cost, feed_dict={
                    x: batch_x,
                    y: batch_y,
                    keep_prob: 1.})
                valid_acc = sess.run(accuracy, feed_dict={
                    x: mnist.validation.images[:test_valid_size],
                    y: mnist.validation.labels[:test_valid_size],
                    keep_prob: 1.})
                print('Epoch {:>2}, Batch {:>3} - loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                      epoch + 1,
                      batch + 1,
            test_acc = sess.run(accuracy, feed_dict={
                x: mnist.test.images[:test_valid_size],
                y: mnist.test.labels[:test_valid_size],
                keep_prob: 1.})
            print('Testing Accuracy: {}'.format(test_acc))

    在 TensorFlow 中,还提供了一个更加方便的 tf.layers API,利用其来构建这个同样架构的 CNN 的代码如下:

    import tensorflow as tf
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("./mnist", one_hot=True, reshape=False)
    # parameters
    learning_rate = 0.001
    epochs = 10
    batch_size = 128
    # number of samples to calculate validation and accuracy
    test_valid_size = 256
    # network parameters
    n_classes = 10
    dropout = tf.placeholder(tf.float32)
    # Input and target placeholders
    inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1))
    targets_ = tf.placeholder(tf.float32)
    # build the conv2d graph with tf.layers.conv2d and tf.layers.max_pooling2d
    # layer 1 - 28*28*1 to 14*14*32
    conv1 = tf.layers.conv2d(inputs_, 32, (5, 5), padding='same', activation=tf.nn.relu)
    maxpool1 = tf.layers.max_pooling2d(conv1, (2, 2), (2, 2))
    # layer 2 - 14*14*32 to 7*7*64
    conv2 = tf.layers.conv2d(maxpool1, 64, (5, 5), padding='same', activation=tf.nn.relu)
    maxpool2 = tf.layers.max_pooling2d(conv2, (2, 2), (2, 2))
    # Fully connected layer
    flattened = tf.reshape(maxpool2, [-1, 7*7*64])
    fc1 = tf.layers.dense(flattened, units=1024, activation=tf.nn.relu)
    fc1 = tf.layers.dropout(fc1, rate=dropout)
    # output logits
    logits = tf.layers.dense(fc1, units=n_classes)
    # define loss and optimizer
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets_))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    # accuracy
    correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(targets_, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # initializing the viriables
    init = tf.global_variables_initializer()
    # launch the graph
    with tf.Session() as sess:
        for epoch in range(epochs):
            for batch in range(mnist.train.num_examples//batch_size):
                batch_x, batch_y = mnist.train.next_batch(batch_size)
                sess.run(optimizer, feed_dict={
                    inputs_: batch_x,
                    targets_: batch_y,
                    dropout: 0.75})
                # calculate batch loss and accuracy
                loss = sess.run(cost, feed_dict={
                    inputs_: batch_x,
                    targets_: batch_y,
                    dropout: 1.})
                valid_acc = sess.run(accuracy, feed_dict={
                    inputs_: mnist.validation.images[:test_valid_size],
                    targets_: mnist.validation.labels[:test_valid_size],
                    dropout: 1.})
                print('Epoch {:>2}, Batch {:>3} - loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                      epoch + 1,
                      batch + 1,
            test_acc = sess.run(accuracy, feed_dict={
                inputs_: mnist.test.images[:test_valid_size],
                targets_: mnist.test.labels[:test_valid_size],
                dropout: 1.})
            print('Testing Accuracy: {}'.format(test_acc))

    为了便于对比,在此给出利用 TensorFlow 搭建一个标准的多层神经网络 Standard neural network 来完成同样识别任务的代码,这个 SNN 的最终识别率为 82%,如果可以无障碍的阅读这两段代码,那么对于 TensorFlow 的基本使用也就算是清楚了,代码版权依然归属于 Udacity。

    from tensorflow.examples.tutorials.mnist import input_data
    import tensorflow as tf
    mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)
    # Parameters
    learning_rate = 0.001
    training_epochs = 20
    batch_size = 128 
    display_step = 1
    n_input = 784  # MNIST data input (img shape: 28*28)
    n_classes = 10  # MNIST total classes (0-9 digits)
    n_hidden_layer = 256 # layer number of features
    # Store layers weight & bias
    weights = {
        'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
        'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
    biases = {
        'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    # tf Graph input
    x = tf.placeholder("float", [None, 28, 28, 1])
    y = tf.placeholder("float", [None, n_classes])
    x_flat = tf.reshape(x, [-1, n_input])
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']), biases['hidden_layer'])
    layer_1 = tf.nn.relu(layer_1)
    # Output layer with linear activation
    logits = tf.matmul(layer_1, weights['out']) + biases['out']
    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
    # Initializing the variables
    init = tf.global_variables_initializer()
    # Launch the graph
    with tf.Session() as sess:
        # Training cycle
        for epoch in range(training_epochs):
            total_batch = int(mnist.train.num_examples/batch_size)
            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y = mnist.train.next_batch(batch_size)
                # Run optimization op (backprop) and cost op (to get loss value)
                sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            # Display logs per epoch step
            if epoch % display_step == 0:
                c = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
                print("Epoch:", '%04d' % (epoch+1), "cost=", \
        print("Optimization Finished!")
        # Test model
        correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        # Decrease test_size if you don't have enough memory
        test_size = 256
        print("Accuracy:", accuracy.eval({x: mnist.test.images[:test_size], y: mnist.test.labels[:test_size]}))

    除了使用 CNN 之外,利用 RNN 也同样可以识别 MNIST 数据集,相应的代码如下:

    # TensorFlow for RNN
    # this cell's code is adopted from 
    # https://jasdeep06.github.io/posts/Understanding-LSTM-in-Tensorflow-MNIST/
    import tensorflow as tf
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("./mnist", one_hot=True)
    # define constant
    # the 28 x 28 lengh input data is unrolled into 28 time steps
    time_steps = 28
    # hidden LSTM units
    num_units = 128
    # each input is a row of 28 pixels
    input_size = 28
    # learning rate for Adam
    learning_rate = 0.001
    # there are 10 classes in the labels
    n_classes = 10
    # batch_size
    batch_size = 128
    # weights and biases for output layer
    out_weights = tf.Variable(tf.random_normal([num_units, n_classes]))
    out_bias = tf.Variable(tf.random_normal([n_classes]))
    # defining inputs and labels placeholders
    x = tf.placeholder(tf.float32, [None, time_steps, input_size])
    y = tf.placeholder(tf.float32, [None, n_classes])
    # processing the input tensor from [batch_size, time_steps, n_input] to 
    # a 'time_steps' length list of [batch_size, n_input] tensors
    inputs = tf.unstack(x, time_steps, 1)
    # defining the network
    lstm_layer = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1)
    outputs, _ = tf.contrib.rnn.static_rnn(lstm_layer, inputs, dtype=tf.float32)
    # converting last output of dimension [batch_size, num_units] to 
    # [batch_size, n_classes] with matrix multplication
    prediction = tf.matmul(outputs[-1], out_weights) + out_bias
    # defining loss and optimization
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    # model evaluation
    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # train the model
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        iter = 1
        while iter < 800:
            batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
            batch_x = batch_x.reshape((batch_size, time_steps, input_size))
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            if iter % 10 == 0:
                acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
                losses = sess.run(loss, feed_dict={x: batch_x, y: batch_y})
                print("For iter ", iter)
                print("Accuracy ", accuracy)
                print("Loss ", losses)
            iter += 1
            test_data = mnist.test.images[:128].reshape((-1, time_steps, input_size))
            test_label = mnist.test.labels[:128]
            print("Test Accuracy ", sess.run(accuracy, feed_dict={x: test_data, y: test_label}))

    按照代码中的参数设定,最终的识别率居然到了 96%,RNN 果然无所不能。


