RNN:
LSTM
LSTM有两条线,分为主线和分线,主线是分线中有用信息累加的。
LSTM有三个门,输入门,输出门,遗忘门。 输入门即图所示的write,用于对当前的输入x以及上一个单元的输入ht-1进行孔子之,提取重要信息。遗忘门即图所示的forget,用于对细胞状态进行更新,舍去不想要内容的部分。输出门即read,用于更新结果,包括细胞状态及输出向下一层的隐藏状态。
当分线信息不重要时,输入门将分线信息忽略。遗忘门是指要不要暂时忘记主线内容。当分线信息重要的时候,便忘记。
根据b站莫烦课程,简单RNN代码,有些代码还是不理解,需要后续进一步看
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# this is data
mnist = input_data.read_data_sets('MNIST_data', one_hot='True')
# hyperparameters
lr = 0.001
training_iters = 100000
batch_size = 128
n_inputs = 28 # MNIST data input (img shape:28*28) 一行的像素
n_steps = 28 # time steps 一共28行的
n_hidden_units = 128 # neurons in hidden layer
n_classes = 10 # MNIST classes(0-9 digits)
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
# Define weight
weights = {
# (28,128)
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
# (128,10)
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
# (128,)
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
# (10,)
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
def RNN(X, weight, biases):
# hidden layer for input to cell
# X(128batch,28 steps,28 inputs)
# ==>>(128*28,28inputs)
X = tf.reshape(X, [-1, n_inputs]) # -1是把数据全部读取
# X_in==>>(128batch*28steps,128hidden)
X_in = tf.matmul(X, weights['in']) + biases['in']
# X==>>(128batch,28steps,128hidden )
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
# cell
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
# lstm cell is divided into two parts (c_state,m_state)
_init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
outputs, states = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=_init_state,
time_major=False) # tf.nn.dynmaic_rnn优点在于对尺度不相同的数据的处理上,会减少计算量。
# time_major是steps,如果在X_in中处于第一个,则True。
# hidden layer for output as the final results
results = tf.matmul(states[1], weight['out']) + biases['out'] # states[0]是c_state,是主线剧情。states[1]是m——state,是分线剧情
# 在当前例子中,states[1]=output[-1]
# # or
# # unpack to list [(batch,outputs)……]*steps
# outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2])) # states is the last outputs 原本是tf.unpack
# #outputs变成有steps个元素,因为要取output最后一个,所以要按照step依次把三维数组展开,得到列表里取倒数第一个,就是计算results的输入
# #最后一个output是看完所有行之后的总结。
# results = tf.matmul(outputs[-1], weight['out']) + biases['out'] # 上一步是将其展开,为了取outputs[-1]
return results
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
step = 0
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
sess.run([train_op], feed_dict={x: batch_xs,
y: batch_ys})
if step % 20 == 0:
print(sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys}))
step += 1



