美文网首页
Pytorch LSTM预测趋势

Pytorch LSTM预测趋势

作者: dawsonenjoy | 来源:发表于2019-11-29 20:37 被阅读0次

    数据集-data.csv

    使用文章:https://www.jianshu.com/p/38df71cad1f6里的数据集(航班数变化趋势),这里直接拷贝在下面:

    Jan-49,112
    Feb-49,118
    Mar-49,132
    Apr-49,129
    May-49,121
    Jun-49,135
    Jul-49,148
    Aug-49,148
    Sep-49,136
    Oct-49,119
    Nov-49,104
    Dec-49,118
    Jan-50,115
    Feb-50,126
    Mar-50,141
    Apr-50,135
    May-50,125
    Jun-50,149
    Jul-50,170
    Aug-50,170
    Sep-50,158
    Oct-50,133
    Nov-50,114
    Dec-50,140
    Jan-51,145
    Feb-51,150
    Mar-51,178
    Apr-51,163
    May-51,172
    Jun-51,178
    Jul-51,199
    Aug-51,199
    Sep-51,184
    Oct-51,162
    Nov-51,146
    Dec-51,166
    Jan-52,171
    Feb-52,180
    Mar-52,193
    Apr-52,181
    May-52,183
    Jun-52,218
    Jul-52,230
    Aug-52,242
    Sep-52,209
    Oct-52,191
    Nov-52,172
    Dec-52,194
    Jan-53,196
    Feb-53,196
    Mar-53,236
    Apr-53,235
    May-53,229
    Jun-53,243
    Jul-53,264
    Aug-53,272
    Sep-53,237
    Oct-53,211
    Nov-53,180
    Dec-53,201
    Jan-54,204
    Feb-54,188
    Mar-54,235
    Apr-54,227
    May-54,234
    Jun-54,264
    Jul-54,302
    Aug-54,293
    Sep-54,259
    Oct-54,229
    Nov-54,203
    Dec-54,229
    Jan-55,242
    Feb-55,233
    Mar-55,267
    Apr-55,269
    May-55,270
    Jun-55,315
    Jul-55,364
    Aug-55,347
    Sep-55,312
    Oct-55,274
    Nov-55,237
    Dec-55,278
    Jan-56,284
    Feb-56,277
    Mar-56,317
    Apr-56,313
    May-56,318
    Jun-56,374
    Jul-56,413
    Aug-56,405
    Sep-56,355
    Oct-56,306
    Nov-56,271
    Dec-56,306
    Jan-57,315
    Feb-57,301
    Mar-57,356
    Apr-57,348
    May-57,355
    Jun-57,422
    Jul-57,465
    Aug-57,467
    Sep-57,404
    Oct-57,347
    Nov-57,305
    Dec-57,336
    Jan-58,340
    Feb-58,318
    Mar-58,362
    Apr-58,348
    May-58,363
    Jun-58,435
    Jul-58,491
    Aug-58,505
    Sep-58,404
    Oct-58,359
    Nov-58,310
    Dec-58,337
    Jan-59,360
    Feb-59,342
    Mar-59,406
    Apr-59,396
    May-59,420
    Jun-59,472
    Jul-59,548
    Aug-59,559
    Sep-59,463
    Oct-59,407
    Nov-59,362
    Dec-59,405
    Jan-60,417
    Feb-60,391
    Mar-60,419
    Apr-60,461
    May-60,472
    Jun-60,535
    Jul-60,622
    Aug-60,606
    Sep-60,508
    Oct-60,461
    Nov-60,390
    Dec-60,432
    

    导入模块

    这个示例比较简单,导入几个常用的就够了:

    import torch
    from torch import nn
    import numpy as np
    import matplotlib.pyplot as plt
    

    初始化定义

    设置下gpu就行了:

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    

    数据预处理

    需要读入数据,然后按照序列长度2来设置数据集,代码如下:

    with open("data.csv", "r", encoding="utf-8") as f:
        data = f.read()
    data = [row.split(',') for row in  data.split("\n")]
    
    value = [int(each[1]) for each in data]
    # 数据是每一天的航班数
    li_x = []
    li_y = []
    seq = 2
    # 因为数据集较少,序列长度太长会影响结果
    for i in range(len(data) - seq):
        # 输入就是[x,x+1]天的航班数,输出时x+2天的航班数
        li_x.append(value[i: i+seq])
        li_y.append(value[i+seq])
    
    # 分训练和测试集
    train_x = (torch.tensor(li_x[:-30]).float() / 1000.).reshape(-1, seq, 1).to(device)
    train_y = (torch.tensor(li_y[:-30]).float() / 1000.).reshape(-1, 1).to(device)
    
    test_x = (torch.tensor(li_x[-30:]).float() / 1000.).reshape(-1, seq, 1).to(device)
    test_y = (torch.tensor(li_y[-30:]).float() / 1000.).reshape(-1, 1).to(device)
    

    定义网络模型

    网络模型就使用一个lstm+全连接实现,代码如下:

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.lstm = nn.LSTM(input_size=1, hidden_size=32, num_layers=1, batch_first=True)
            # 输入格式是1,输出隐藏层大小是32
            # 对于小数据集num_layers不要设置大,否则会因为模型变复杂而导致效果会变差
            # num_layers顾名思义就是有几个lstm层,假如设置成2,就相当于连续经过两个lstm层
            # 原来的输入格式是:(seq, batch, shape)
            # 设置batch_first=True以后,输入格式就可以改为:(batch, seq, shape),更符合平常使用的习惯
            self.linear = nn.Linear(32*seq, 1)
    
        def forward(self, x):
            x, (h, c) = self.lstm(x)
            x = x.reshape(-1, 32*seq)
            x = self.linear(x)
            return x
    

    定义损失函数和优化器

    model = Net().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
    loss_fun = nn.MSELoss()
    

    训练模型

    model.train()
    for epoch in range(300):
        output = model(train_x)
        loss = loss_fun(output, train_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch % 20 == 0 and epoch > 0:
            test_loss = loss_fun(model(test_x), test_y)
            print("epoch:{}, loss:{}, test_loss: {}".format(epoch, loss, test_loss))
    

    测试模型

    通过可视化查看预测趋势和实际趋势的差别:

    model.eval()
    result = li_x[0][:seq-1] + list((model(train_x).data.reshape(-1))*1000) + list((model(test_x).data.reshape(-1))*1000)
    # 通过模型计算预测结果并解码后保存到列表里,因为预测是从第seq个开始的,所有前面要加seq-1条数据
    plt.plot(value, label="real")
    # 原来的走势
    plt.plot(result, label="pred")
    # 模型预测的走势
    plt.legend(loc='best')
    
    测试结果(根据seed的不同,每次跑出的结果可能会有偏差)

    完整代码

    import torch
    from torch import nn
    import numpy as np
    import matplotlib.pyplot as plt
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    with open("data.csv", "r", encoding="utf-8") as f:
        data = f.read()
    data = [row.split(',') for row in  data.split("\n")]
    
    value = [int(each[1]) for each in data]
    # 数据是每一天的航班数
    li_x = []
    li_y = []
    seq = 2
    # 因为数据集较少,序列长度太长会影响结果
    for i in range(len(data) - seq):
        # 输入就是[x,x+1]天的航班数,输出时x+2天的航班数
        li_x.append(value[i: i+seq])
        li_y.append(value[i+seq])
    
    # 分训练和测试集
    train_x = (torch.tensor(li_x[:-30]).float() / 1000.).reshape(-1, seq, 1).to(device)
    train_y = (torch.tensor(li_y[:-30]).float() / 1000.).reshape(-1, 1).to(device)
    
    test_x = (torch.tensor(li_x[-30:]).float() / 1000.).reshape(-1, seq, 1).to(device)
    test_y = (torch.tensor(li_y[-30:]).float() / 1000.).reshape(-1, 1).to(device)
    
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.lstm = nn.LSTM(input_size=1, hidden_size=32, num_layers=1, batch_first=True)
            # 输入格式是1,输出隐藏层大小是32,对于序列比较短的数据num_layers不要设置大,否则效果会变差
            # 原来的输入格式是:(seq, batch, shape),设置batch_first=True以后,输入格式就可以改为:(batch, seq, shape),更符合平常使用的习惯
            self.linear = nn.Linear(32*seq, 1)
    
        def forward(self, x):
            x, (h, c) = self.lstm(x)
            x = x.reshape(-1, 32*seq)
            x = self.linear(x)
            return x
            
    model = Net().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
    loss_fun = nn.MSELoss()
    
    model.train()
    for epoch in range(300):
        output = model(train_x)
        loss = loss_fun(output, train_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch % 20 == 0 and epoch > 0:
            test_loss = loss_fun(model(test_x), test_y)
            print("epoch:{}, loss:{}, test_loss: {}".format(epoch, loss, test_loss))
    
    model.eval()
    result = li_x[0][:seq-1] + list((model(train_x).data.reshape(-1))*1000) + list((model(test_x).data.reshape(-1))*1000)
    # 通过模型计算预测结果并解码后保存到列表里,因为预测是从第seq个开始的,所有前面要加seq-1条数据
    plt.plot(value, label="real")
    # 原来的走势
    plt.plot(result, label="pred")
    # 模型预测的走势
    plt.legend(loc='best')
    

    相关文章

      网友评论

          本文标题:Pytorch LSTM预测趋势

          本文链接:https://www.haomeiwen.com/subject/majmyctx.html