- 问题描述
基于历史的20个通道,预测3个通道未来的值。 - 流程
1.时间序列数据预处理:利用pandas.dataFrame的shift函数
df_targets = df[target_city][target_names].shift(-shift_steps)
2.归一化数据
x_scaler = MinMaxScaler()
x_train_scaled = x_scaler.fit_transform(x_train)
3.batch数据生成器(data generator)
def batch_generator(batch_size, sequence_length):
while True:
x_shape = (batch_size, sequence_length, num_x_signals)
x_batch = np.zeros(shape=x_shape, dtype=np.float16)
y_shape = (batch_size, sequence_length, num_y_signals)
y_batch = np.zeros(shape=y_shape, dtype=np.float16)
for i in range(batch_size):
idx = np.random.randint(num_train - sequence_length)
x_batch[i] = x_train_scaled[idx:idx+sequence_length]
y_batch[i] = y_train_scaled[idx:idx+sequence_length]
yield (x_batch, y_batch)
batch_size = 32
sequence_length = 24 * 7 * 8
generator = batch_generator(batch_size=batch_size,
sequence_length=sequence_length)
x_batch, y_batch = next(generator)
print(x_batch.shape)
print(y_batch.shape)
# (32, 1344, 20)
# (32, 1344, 3)
# 验证集
validation_data = (np.expand_dims(x_test_scaled, axis=0),
np.expand_dims(y_test_scaled, axis=0))
4.RNN模型
model = Sequential()
model.add(GRU(units=512,
return_sequences=True,
input_shape=(None, num_x_signals)))
model.add(Dense(num_y_signals, activation="sigmoid"))
5.损失函数,只计算模型在预热周期之后的损失
warmup_steps = 50
def loss_mse_warmup(y_true, y_pred):
y_true_slice = y_true[:, warmup_steps:, :]
y_pred_slice = y_pred[:, warmup_steps:, :]
loss = tf.losses.mean_squared_error(labels=y_true_slice,
predictions=y_pred_slice)
loss_mean = tf.reduce_mean(loss)
return loss_mean
6.编译
optimizer = RMSprop(lr=1e-3)
model.compile(loss=loss_mse_warmup, optimizer=optimizer)
7.回调函数Callback Functions
path_checkpoint = '23_checkpoint.keras'
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
monitor='val_loss',
verbose=1,
save_weights_only=True,
save_best_only=True)
callback_early_stopping = EarlyStopping(monitor='val_loss',
patience=5, verbose=1)
callback_tensorboard = TensorBoard(log_dir='./23_logs/',
histogram_freq=0,
write_graph=False)
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
factor=0.1,
min_lr=1e-4,
patience=0,
verbose=1)
callbacks = [callback_early_stopping,
callback_checkpoint,
callback_tensorboard,
callback_reduce_lr]
7.训练模型
model.fit_generator(generator=generator,
epochs=20,
steps_per_epoch=100,
validation_data=validation_data,
callbacks=callbacks)
8.加载检查点
try:
model.load_weights(path_checkpoint)
except Exception as error:
print("Error trying to load checkpoint.")
print(error)
9.结果评价
result = model.evaluate(x=np.expand_dims(x_test_scaled, axis=0),
y=np.expand_dims(y_test_scaled, axis=0))
print("loss (test-set):", result)
10.生成预测值并作图
def plot_comparison(start_idx, length=100, train=True):
"""
Plot the predicted and true output-signals.
:param start_idx: Start-index for the time-series.
:param length: Sequence-length to process and plot.
:param train: Boolean whether to use training- or test-set.
"""
if train:
# Use training-data.
x = x_train_scaled
y_true = y_train
else:
# Use test-data.
x = x_test_scaled
y_true = y_test
# End-index for the sequences.
end_idx = start_idx + length
# Select the sequences from the given start-index and
# of the given length.
x = x[start_idx:end_idx]
y_true = y_true[start_idx:end_idx]
# Input-signals for the model.
x = np.expand_dims(x, axis=0)
# Use the model to predict the output-signals.
y_pred = model.predict(x)
# The output of the model is between 0 and 1.
# Do an inverse map to get it back to the scale
# of the original data-set.
y_pred_rescaled = y_scaler.inverse_transform(y_pred[0])
# For each output-signal.
for signal in range(len(target_names)):
# Get the output-signal predicted by the model.
signal_pred = y_pred_rescaled[:, signal]
# Get the true output-signal from the data-set.
signal_true = y_true[:, signal]
# Make the plotting-canvas bigger.
plt.figure(figsize=(15,5))
# Plot and compare the two signals.
plt.plot(signal_true, label='true')
plt.plot(signal_pred, label='pred')
# Plot grey box for warmup-period.
p = plt.axvspan(0, warmup_steps, facecolor='black', alpha=0.15)
# Plot labels etc.
plt.ylabel(target_names[signal])
plt.legend()
plt.show()
plot_comparison(start_idx=200, length=1000, train=False)
网友评论