RNN入门：多层LSTM网络（四）

2023年10月9日 252次阅读来源: 深度学习入门

上一篇介绍了如何编写单层的LSTM网络。对于一些复杂的序列，需要用到多层的网络进行学习。这里介绍如何利用TensorFlow（r1.1）编写多层LSTM网络。

建立模型

首先利用tf.contrib.rnn.MultiRNNCell将多个BasicLSTMCell单元汇总为一个。值得注意的是，每次添加一个单元需要重新调用一次BasicLSTMCell。因为该函数每次都会声明一次内部变量，如果不这么做则会reuse这些变量，从而产生错误。

# Forward passes
cells = []
for n in range(num_layers):
    cells.append(tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple=True))
cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

为每一层的初始状态设置初始值。也可以采用.zero_state方法生成初始值，但是这样就不能对中间状态进行显示控制。具体根据实际应用选择。

init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
state_per_layer_list = tf.stack(init_state, axis=0)
rnn_tuple_state = tuple(
    [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) for idx in range(num_layers)]
)
# init_state = cell.zero_state(batch_size, tf.float32)

损失函数

基于最后一层网络的输出状态进行预测估计。

logits_series = []
for state in states_series:
    logits_series.append(tf.matmul(state[-1][0], W1) + tf.matmul(state[-1][1], W2) + b2)
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

模型训练

利用numpy计算训练中的初始值。

_current_state = np.zeros((num_layers, 2, batch_size, state_size))

全部代码

from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
num_layers = 3

def generateData():
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])



# Unpack columns
inputs_series = tf.split(batchX_placeholder, truncated_backprop_length, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)

# Forward passes
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple=True)
cells = []
for n in range(num_layers):
    cells.append(tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple=True))
stacked_lstm = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)


init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
state_per_layer_list = tf.stack(init_state, axis=0)
rnn_tuple_state = tuple(
    [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) for idx in range(num_layers)]
)
# init_state = stacked_lstm.zero_state(batch_size, tf.float32)

current_state = rnn_tuple_state
states_series = []
for current_input in inputs_series:
    with tf.variable_scope('rnn') as vs:
        try:
            output, current_state = stacked_lstm(current_input, current_state)
        except:
            vs.reuse_variables()
            output, current_state = stacked_lstm(current_input, current_state)
    states_series.append(current_state)

W1 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b1 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

logits_series = []
for state in states_series:
    logits_series.append(tf.matmul(state[-1][0], W1) + tf.matmul(state[-1][1], W2) + b2)
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series,labels_series)]
total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])

        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    plt.ion()
    plt.figure()
    plt.show()
    loss_list = []

    for epoch_idx in range(num_epochs):
        x,y = generateData()

        _current_state = np.zeros((num_layers, 2, batch_size, state_size))

        print("New data, epoch", epoch_idx)

        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder: batchX,
                    batchY_placeholder: batchY,
                    init_state: _current_state
                })


            loss_list.append(_total_loss)

            if batch_idx%100 == 0:
                print("Step",batch_idx, "Batch loss", _total_loss)
                plot(loss_list, _predictions_series, batchX, batchY)

plt.ioff()
plt.show()

参考文献：

    原文作者：深度学习入门
    原文地址: https://www.jianshu.com/p/b3c7883e3ddf
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。