美文网首页LSTMTensorFlow入门
RNN入门:多层LSTM网络(四)

RNN入门:多层LSTM网络(四)

作者: 调参写代码 | 来源:发表于2017-05-10 10:57 被阅读1065次

    上一篇介绍了如何编写单层的LSTM网络。对于一些复杂的序列,需要用到多层的网络进行学习。这里介绍如何利用TensorFlow(r1.1)编写多层LSTM网络。

    建立模型

    首先利用tf.contrib.rnn.MultiRNNCell将多个BasicLSTMCell单元汇总为一个。值得注意的是,每次添加一个单元需要重新调用一次BasicLSTMCell。因为该函数每次都会声明一次内部变量,如果不这么做则会reuse这些变量,从而产生错误。

    # Forward passes
    cells = []
    for n in range(num_layers):
        cells.append(tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple=True))
    cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
    

    为每一层的初始状态设置初始值。也可以采用.zero_state方法生成初始值,但是这样就不能对中间状态进行显示控制。具体根据实际应用选择。

    init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
    state_per_layer_list = tf.stack(init_state, axis=0)
    rnn_tuple_state = tuple(
        [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) for idx in range(num_layers)]
    )
    # init_state = cell.zero_state(batch_size, tf.float32)
    

    损失函数

    基于最后一层网络的输出状态进行预测估计。

    logits_series = []
    for state in states_series:
        logits_series.append(tf.matmul(state[-1][0], W1) + tf.matmul(state[-1][1], W2) + b2)
    predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
    

    模型训练

    利用numpy计算训练中的初始值。

    _current_state = np.zeros((num_layers, 2, batch_size, state_size))
    

    全部代码

    from __future__ import print_function, division
    import numpy as np
    import tensorflow as tf
    import matplotlib.pyplot as plt
    
    num_epochs = 100
    total_series_length = 50000
    truncated_backprop_length = 15
    state_size = 4
    num_classes = 2
    echo_step = 3
    batch_size = 5
    num_batches = total_series_length//batch_size//truncated_backprop_length
    num_layers = 3
    
    def generateData():
        x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
        y = np.roll(x, echo_step)
        y[0:echo_step] = 0
    
        x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
        y = y.reshape((batch_size, -1))
    
        return (x, y)
    
    batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
    batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
    
    
    
    # Unpack columns
    inputs_series = tf.split(batchX_placeholder, truncated_backprop_length, axis=1)
    labels_series = tf.unstack(batchY_placeholder, axis=1)
    
    # Forward passes
    cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple=True)
    cells = []
    for n in range(num_layers):
        cells.append(tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple=True))
    stacked_lstm = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
    
    
    init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
    state_per_layer_list = tf.stack(init_state, axis=0)
    rnn_tuple_state = tuple(
        [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) for idx in range(num_layers)]
    )
    # init_state = stacked_lstm.zero_state(batch_size, tf.float32)
    
    current_state = rnn_tuple_state
    states_series = []
    for current_input in inputs_series:
        with tf.variable_scope('rnn') as vs:
            try:
                output, current_state = stacked_lstm(current_input, current_state)
            except:
                vs.reuse_variables()
                output, current_state = stacked_lstm(current_input, current_state)
        states_series.append(current_state)
    
    W1 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
    b1 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
    W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
    b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
    
    logits_series = []
    for state in states_series:
        logits_series.append(tf.matmul(state[-1][0], W1) + tf.matmul(state[-1][1], W2) + b2)
    predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
    
    losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series,labels_series)]
    total_loss = tf.reduce_mean(losses)
    
    train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
    
    def plot(loss_list, predictions_series, batchX, batchY):
        plt.subplot(2, 3, 1)
        plt.cla()
        plt.plot(loss_list)
    
        for batch_series_idx in range(5):
            one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
            single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])
    
            plt.subplot(2, 3, batch_series_idx + 2)
            plt.cla()
            plt.axis([0, truncated_backprop_length, 0, 2])
            left_offset = range(truncated_backprop_length)
            plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
            plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
            plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")
    
        plt.draw()
        plt.pause(0.0001)
    
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        plt.ion()
        plt.figure()
        plt.show()
        loss_list = []
    
        for epoch_idx in range(num_epochs):
            x,y = generateData()
    
            _current_state = np.zeros((num_layers, 2, batch_size, state_size))
    
            print("New data, epoch", epoch_idx)
    
            for batch_idx in range(num_batches):
                start_idx = batch_idx * truncated_backprop_length
                end_idx = start_idx + truncated_backprop_length
    
                batchX = x[:,start_idx:end_idx]
                batchY = y[:,start_idx:end_idx]
    
                _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                    [total_loss, train_step, current_state, predictions_series],
                    feed_dict={
                        batchX_placeholder: batchX,
                        batchY_placeholder: batchY,
                        init_state: _current_state
                    })
    
    
                loss_list.append(_total_loss)
    
                if batch_idx%100 == 0:
                    print("Step",batch_idx, "Batch loss", _total_loss)
                    plot(loss_list, _predictions_series, batchX, batchY)
    
    plt.ioff()
    plt.show()
    

    参考文献:

    相关文章

      网友评论

        本文标题:RNN入门:多层LSTM网络(四)

        本文链接:https://www.haomeiwen.com/subject/fviwtxtx.html