- 缓存预测 s2s代码
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
import numpy as np
import helpers
#读取数据
dataset1 = np.load('E:/PyCharm/Agua_Code/181225zhu-master_deepcaching/dataset1_v3.npy',encoding = "latin1") #加载文件
N = 80 #时间序列的数量
N_interval = 1000 #时间序列的长度
d = 50 #唯一目标的数量
m = 20 #输入的样本序列长度
k = 10 #输出的预测未来序列长度
Ni = np.array([([0] * d) for i in range(N)]) # Ni是 N*d 即80*50的二维数组
for ii in range(N):
a = dataset1[ii]
for i in range(d):
for j in range(N_interval):
if (a[j] == i):
Ni[ii,i] += 1
popu= [[Ni[i][j]/1000 for j in range(len(Ni[i]))] for i in range(len(Ni))]
popularity = tf.reshape(popu, [-1, 50])
with tf.Session() as sess:
popularity = sess.run(popularity)
one_train = popularity[0:60,:]
one_test = popularity[60:80,:] #test数据先不做整理
one_train_seq = one_train.tolist() #将数组array形式的数据转化为list形式
one_train_seq += one_train_seq
one_train_seq += one_train_seq
train_seq = one_train_seq
EOS = 1
vocab_size = 60 #改10
input_embedding_size = 50 #嵌入维度大小
encoder_hidden_units = 128
decoder_hidden_units = encoder_hidden_units
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs')
decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets')
decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs')
embeddings = tf.Variable(tf.truncated_normal([vocab_size, input_embedding_size], mean=0.0, stddev=0.1), dtype=tf.float32)
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs)
#ENCODER
encoder_cell = tf.contrib.rnn.BasicLSTMCell(encoder_hidden_units)
lstm_layers = 4
cell = tf.contrib.rnn.MultiRNNCell([encoder_cell] * lstm_layers)
# If `time_major == True`, this must be a `Tensor` of shape:
# `[max_time, batch_size, ...]`, or a nested tuple of such
# elements.
encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell,encoder_inputs_embedded,dtype=tf.float32,time_major=True)
del encoder_outputs #删除一个元素
#DECODER
decoder_cell = tf.contrib.rnn.BasicLSTMCell(decoder_hidden_units)
decoder = tf.contrib.rnn.MultiRNNCell([decoder_cell] * lstm_layers)
decoder_outputs, decoder_final_state = tf.nn.dynamic_rnn(
decoder, decoder_inputs_embedded,
initial_state=encoder_final_state,
dtype=tf.float32, time_major=True, scope="plain_decoder",
)
decoder_logits = tf.contrib.layers.fully_connected(decoder_outputs,vocab_size,activation_fn=None,
weights_initializer = tf.truncated_normal_initializer(stddev=0.1),
biases_initializer=tf.zeros_initializer())
# decoder_prediction = tf.argmax(decoder_logits,)
decoder_prediction = tf.argmax(decoder_logits,2) # 在哪个维度上求 argmax。
# learn_rate = tf.placeholder(tf.float32)
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32),
logits=decoder_logits,
)
loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)
'''with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(57):
batch_ = train_seq[i:i+2]
din_ = train_seq[i+2:i+3]
pred_ = sess.run(decoder_prediction,
feed_dict={
encoder_inputs: batch_,
decoder_inputs: din_,
# learn_rate:0.1,
})
print('decoder predictions:\n' + str(pred_))
print("build graph ok!")
'''
#有用的 用for循环
'''
batch_size = 60
batches = one_train
# batches = helpers.random_sequences(length_from=3, length_to=8,
# vocab_lower=2, vocab_upper=10,
# batch_size=batch_size)
def get_batches(sources, targets, batch_size):
"""
获取batch
"""
for batch_i in range(0, len(sources) // batch_size):
start_i = batch_i * batch_size
# Slice the right amount for the batch
sources_batch = sources[start_i:start_i + batch_size]
targets_batch = targets[start_i:start_i + batch_size]
# Need the lengths for the _lengths parameters
targets_lengths = []
for target in targets_batch:
targets_lengths.append(len(target))
source_lengths = []
for source in sources_batch:
source_lengths.append(len(source))
yield sources_batch, targets_batch, source_lengths, targets_lengths
def next_feed():
batch = next(batches)
encoder_inputs_, _ = helpers.batch(batch)
decoder_targets_, _ = helpers.batch(
[(sequence) + [EOS] for sequence in batch]
)
decoder_inputs_, _ = helpers.batch(
[[EOS] + (sequence) for sequence in batch]
)
return {
encoder_inputs: encoder_inputs_,
decoder_inputs: decoder_inputs_,
decoder_targets: decoder_targets_,
}
loss_track = []
max_batches = 3001
batches_in_epoch = 1000
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
try:
for batch in range(max_batches):
fd = next_feed() # 获取下一个batch
_, l = sess.run([train_op, loss], fd)
loss_track.append(l)
if batch == 0 or batch % batches_in_epoch == 0:
print('batch {}'.format(batch))
print(' minibatch loss: {}'.format(sess.run(loss, fd)))
predict_ = sess.run(decoder_prediction, fd)
for i, (inp, pred) in enumerate(zip(fd[encoder_inputs].T, predict_.T)):
print(' sample {}:'.format(i + 1))
print(' input > {}'.format(inp))
print(' predicted > {}'.format(pred))
if i >= 2:
break
print()
except KeyboardInterrupt:
print('training interrupted')
'''
batch_size = 100
# batches = iter(train_seq)
'''
#准备代替函数next_feed
def get_batches(sources, targets, batch_size):
"""
获取batch
"""
for batch_i in range(0, len(sources) // batch_size):
start_i = batch_i * batch_size
# Slice the right amount for the batch
sources_batch = sources[start_i:start_i + batch_size]
targets_batch = targets[start_i:start_i + batch_size]
# Need the lengths for the _lengths parameters
targets_lengths = []
for target in targets_batch:
targets_lengths.append(len(target))
source_lengths = []
for source in sources_batch:
source_lengths.append(len(source))
yield sources_batch, targets_batch, source_lengths, targets_lengths
'''
'''def next_feed():
batch = next(batches)
encoder_inputs_ = batch
decoder_targets_ = [(sequence) for sequence in batch]
decoder_inputs_ = [(sequence) for sequence in batch]
return {
encoder_inputs: encoder_inputs_,
decoder_inputs: decoder_inputs_,
decoder_targets: decoder_targets_,
}
'''
loss_track = []
max_batches = 201
batches_in_epoch = 100
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
try:
j = 0
for batch in range(max_batches):
# fd = next_feed()
_, l = sess.run([train_op, loss], {encoder_inputs:train_seq[j:j+20],decoder_inputs:train_seq[j+20:j+30],decoder_targets:train_seq[j+20:j+30]})
loss_track.append(l)
if batch == 0 or batch % batches_in_epoch == 0:
print('batch {}'.format(batch))
print(' minibatch loss: {}'.format(sess.run(loss, {encoder_inputs:train_seq[j:j+20],decoder_inputs:train_seq[j+20:j+30],decoder_targets:train_seq[j+20:j+30]})))
predict_ = sess.run(decoder_prediction, {encoder_inputs:train_seq[j:j+20],decoder_inputs:train_seq[j+20:j+30],decoder_targets:train_seq[j+20:j+30]})
for i, (inp, pred) in enumerate(zip(np.array(train_seq[j:j+20]).T, predict_.T)):
print(' sample {}:'.format(i + 1))
print(' input > {}'.format(inp))
print(' predicted > {}'.format(pred))
if i >= 2:
break
print()
j += 1
except KeyboardInterrupt:
print('training interrupted')
import matplotlib.pyplot as plt
plt.plot(loss_track)
plt.ylabel('loss') #为y轴加注释
plt.show()
-
OCR可以不用embedding,直接输入RNN
Python图像处理之图片文字识别(OCR)
网友评论