tensorflow练习系列:
tensorflow练习1-DNN
现在我们尝试使用lstm来预测一个周期性的数据
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from matplotlib import pyplot as plt
from sklearn import preprocessing, model_selection
from sklearn import preprocessing
import pandas as pd
input_vec_size = 1 # 输入向量的维度
lstm_size = 10 # size of lstm
time_step_size = 5 # 循环层长度
batch_size = 7
test_size = 3
1. 准备数据
我们使用伪造的数据
day = (time_step_size + 1)* 200
week_rate = [0.9, 0.85, 0.80, 0.88, 1.1, 1.2, 1.15]
label = [(1 + i * 0.002) * week_rate[i%7] for i in range(day)]
label = np.array(label)
# scaler = preprocessing.StandardScaler()
# label = scaler.fit_transform(label)
label = label.reshape(int(day / (time_step_size + 1)), (time_step_size + 1))
print(label.shape)
_tmp = label
X_ = _tmp[:, :time_step_size]
Y_ = _tmp[:, time_step_size:]
print(_tmp.shape, X_.shape, Y_.shape)
plt.plot(_tmp[:100, 0])
plt.show()
输出结果如下
(200, 6)
(200, 6) (200, 5) (200, 1)
周期性的数据
2. 准备网络
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, W, B, lstm_size):
# X, input shape: (batch_size, time_step_size, input_vec_size)
# XT shape: (time_step_size, batch_size, input_vec_size)
print(X.shape)
XT = tf.transpose(X, [1, 0, 2])
# XR shape: (time_step_size * batch_size, input_vec_size)
XR = tf.reshape(XT, [-1, input_vec_size]) # each row has input for each lstm cell (lstm_size=input_vec_size)
# Each array shape: (batch_size, input_vec_size)
X_split = tf.split(XR, time_step_size, 0) # split them to time_step_size
# Make lstm with lstm_size (each input vector size). num_units=lstm_size; forget_bias=1.0
lstm = rnn.BasicLSTMCell(lstm_size, forget_bias=1.0, state_is_tuple=True)
# Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)
# rnn..static_rnn()的输出对应于每一个timestep,如果只关心最后一步的输出,取outputs[-1]即可
outputs, _states = rnn.static_rnn(lstm, X_split, dtype=tf.float32) # 时间序列上每个Cell的输出:[... shape=(128, 28)..]
# Linear activation
# Get the last output
return tf.matmul(outputs[-1], W) + B, lstm.state_size # State size to initialize the stat
trX, teX, trY, teY = model_selection.train_test_split(X_, Y_, test_size=0.3)
print(trX.shape, trY.shape, teX.shape, teY.shape)
trX = trX.reshape(-1, time_step_size, 1)
teX = teX.reshape(-1, time_step_size, 1)
trY = trY.reshape(-1, 1)
teY = teY.reshape(-1, 1)
X = tf.placeholder("float", [None, time_step_size, 1])
Y = tf.placeholder("float", [None, 1])
# get lstm_size and output 10 labels
W = init_weights([lstm_size, 1])
B = init_weights([1])
py_x, state_size = model(X, W, B, lstm_size)
loss = tf.reduce_mean(tf.square(py_x - Y))
train_op = tf.train.AdamOptimizer(0.01).minimize(loss)
predict_op = py_x
session_conf = tf.ConfigProto()
session_conf.gpu_options.allow_growth = True
3. 开始训练
# Launch the graph in a session
with tf.Session(config=session_conf) as sess:
# you need to initialize all variables
tf.global_variables_initializer().run()
for i in range(5000):
for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size)):
#print("feed:", trX[start:end][0] , trY[start:end][0])
#print("feed:", trX[start:end].shape, trY[start:end].shape)
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
if(i % 100 == 0):
print(i, sess.run(loss, feed_dict={X: trX, Y: trY}), sess.run(loss, feed_dict={X: teX, Y: teY}))
predic = sess.run(predict_op, feed_dict={X: X_.reshape(-1, time_step_size, 1), Y: Y_.reshape(-1, 1)})
训练结果如下:
0 0.708224 0.584213
100 0.000834472 0.000973581
200 0.000377034 0.000441525
300 0.0111581 0.0139003
400 0.000116222 0.000156086
500 0.000468893 0.000493862
600 0.000166236 0.000181855
700 4.5977e-05 7.10702e-05
800 0.0001364 0.000223403
900 5.61442e-05 7.7883e-05
1000 4.72553e-05 6.94315e-05
1100 0.00035655 0.000320759
1200 1.88308e-05 3.37516e-05
1300 1.21966e-05 2.17105e-05
1400 4.73054e-05 7.62947e-05
1500 7.19001e-06 2.29275e-05
1600 0.00010293 0.000149464
1700 3.69292e-05 4.97202e-05
1800 1.507e-05 2.25646e-05
1900 1.81654e-05 3.16758e-05
2000 6.29222e-06 1.25351e-05
2100 7.36748e-05 0.000105863
2200 9.74285e-06 1.90102e-05
2300 5.93037e-05 7.35179e-05
2400 0.000723914 0.000536784
2500 3.20104e-06 8.17306e-06
2600 3.83796e-06 1.01513e-05
2700 5.5799e-06 1.48322e-05
2800 3.92063e-06 1.29322e-05
2900 3.68061e-06 1.5905e-05
3000 2.65767e-06 1.1614e-05
3100 3.09019e-06 1.39719e-05
3200 0.000264612 0.000287502
3300 4.02342e-05 4.46263e-05
3400 2.33401e-05 3.36289e-05
3500 8.55672e-05 0.000107355
3600 3.20796e-05 4.43166e-05
3700 1.7802e-05 2.81231e-05
3800 1.15583e-05 1.70578e-05
3900 1.40946e-05 2.29459e-05
4000 1.16366e-05 1.92793e-05
4100 4.93991e-06 1.29583e-05
4200 2.57865e-05 4.40378e-05
4300 6.99072e-06 1.18626e-05
4400 3.02546e-06 7.17427e-06
4500 6.18525e-06 1.39441e-05
4600 0.000154743 0.000180568
4700 7.82577e-05 7.30797e-05
4800 5.27492e-05 6.85774e-05
4900 1.77607e-05 2.66213e-05
查看一下结果数据:
plt.plot(Y_[:100])
plt.plot(predic[:100])
plt.show()
# print(scaler.mean_)
print(np.mean(np.square(Y_ - predic)))
# print(np.hstack([scaler.inverse_transform(Y_), scaler.inverse_transform(predic)]))
print(np.hstack([X_, Y_, predic])[:20])
得到
基本符合预期
9.2249199079e-05
[[ 0.9 0.8517 0.8032 0.88528 1.1088 1.212
1.21290922]
[ 1.1638 0.9126 0.8636 0.8144 0.8976 1.1242
1.12351131]
[ 1.2288 1.1799 0.9252 0.8755 0.8256 0.90992
0.90445042]
[ 1.1396 1.2456 1.196 0.9378 0.8874 0.8368
0.82641059]
[ 0.92224 1.155 1.2624 1.2121 0.9504 0.8993
0.8717519 ]
[ 0.848 0.93456 1.1704 1.2792 1.2282 0.963
0.95084727]
[ 0.9112 0.8592 0.94688 1.1858 1.296 1.2443
1.22580171]
[ 0.9756 0.9231 0.8704 0.9592 1.2012 1.3128
1.31333256]
[ 1.2604 0.9882 0.935 0.8816 0.97152 1.2166
1.2210536 ]
[ 1.3296 1.2765 1.0008 0.9469 0.8928 0.98384
0.97711486]
[ 1.232 1.3464 1.2926 1.0134 0.9588 0.904
0.89058191]
[ 0.99616 1.2474 1.3632 1.3087 1.026 0.9707
0.95494765]
[ 0.9152 1.00848 1.2628 1.38 1.3248 1.0386
1.02744293]
[ 0.9826 0.9264 1.0208 1.2782 1.3968 1.3409
1.33157599]
[ 1.0512 0.9945 0.9376 1.03312 1.2936 1.4136
1.41023409]
[ 1.357 1.0638 1.0064 0.9488 1.04544 1.309
1.31301975]
[ 1.4304 1.3731 1.0764 1.0183 0.96 1.05776
1.05260563]
[ 1.3244 1.4472 1.3892 1.089 1.0302 0.9712
0.95847595]
[ 1.07008 1.3398 1.464 1.4053 1.1016 1.0421
1.03257525]
[ 0.9824 1.0824 1.3552 1.4808 1.4214 1.1142
1.10492337]]
进一步图形化结果:
mean_ = np.mean(Y_)
plt.plot((predic - Y_)/mean_ * 100)
plt.show()
print("mean", mean_)
预测误差, 单位%
mean 2.166021
4. 小结
- 之前的代码存在过拟合,增加样本数量后, 得到缓解, (一定要同时打印出train&test loss)
- 之前的代码拟合效果不佳, 与样本的数据分布过大有关, 应该在lstm哪个地方加上norm, 适应更多的数据分布。目前取得的结果与数据的分布比较合适有关。
- Adam似乎是一个不错的选择。我也尝试过RMSprop, 但是效果不佳, 貌似动量有点过头的样子。
思考
- 预测的结果成周期性, 是否是没有学习到一些内容造成的?
- 怎样评判当前的结果呢?
print("当前的预测结果", np.mean(np.square(predic-Y_)))
#对比, 假设网络什么都没有学习到, 那么直接用X_最后一个作为预测结果
print("直接用上一个值作为预测结果", np.mean(np.square(X_[:, -1] - Y_)))
print("学习到了一点点的大势", np.mean(np.square(X_[:, -1] + 0.002 -Y_)))
当前的预测结果 1.69001122001e-05
直接用上一个值作为预测结果 1.96738985325
学习到了一点点的大势 1.96737203849
-
从上面的结果看, 还是学习到了一点的。
-
但是从上面可以看到, 其实我们是选的周期为6的数据在训练, 我们把周期改成7了, 结果会不会更好呢?
答案当然是更好, loss达到了1.53918561546e-07, 原来周期为5+1的时候, loss为 1.07650830461e-05
我们猜想原因可能是因为网络可以直接学习到最后一个数字 / 1.2 * 1.15 修正0.02的增产率, 就可以得到结果。
那么, 我们是否能通过修正网络, 让周期不是6+1的时候, 也得到较好的结果e-7等级的loss呢? -
把当前星期几作为特征输入,结果直到训练到2900步才把loss降到e-6级别, 是否有优化的前景呢?
-
现在2007个数据, 实际也只用了200组作为训练和验证数据, 刚好7个数据一组, 实际上我们可以n个数据, 取出n - 7 + 1组出来, 这个结果, 期待后续更新, 结果单纯改到多数组, loss反而更高, 到了1.6e-3级别, 标准化输入数据后,结果loss到了1.5e-3~1.9e-4( 这个是scaler.scaler_**2 的结果),loss有所降低, 但是效果不是特别明显
5. 改进
目前其实效果还是不理想, 等理解更输入, 有时间后续改进模型。
网友评论