美文网首页
时间序列分析_co2的预测分析

时间序列分析_co2的预测分析

作者: a_big_cat | 来源:发表于2021-03-28 11:07 被阅读0次

    分析目的:

    根据时间序列反映出来发展过程和趋势,建立ARIMA模型,预测下一段时间可能达到的水平。

    字段说明

    date:时间
    co2: 二氧化碳

    1.导入数据

    import pandas as pd
    import numpy as np
    import statsmodels.api as sm
    import matplotlib.pyplot as plt
    plt.style.use('fivethirtyeight')
    data = sm.datasets.co2.load_pandas()
    Mauna_Loa_CO2 =pd.read_csv('./Mauna_Loa_CO2 .csv')
    Mauna_Loa_CO2.head()
    
    date co2
    0 1958/3/29 316.1
    1 1958/4/5 317.3
    2 1958/4/12 317.6
    3 1958/4/19 317.5
    4 1958/4/26 316.4

    2.数据探索

    
    
    Mauna_Loa_CO2.info()
    
    
    
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 2284 entries, 0 to 2283
    Data columns (total 2 columns):
     #   Column  Non-Null Count  Dtype  
    ---  ------  --------------  -----  
     0   date    2284 non-null   object 
     1   co2     2225 non-null   float64
    dtypes: float64(1), object(1)
    memory usage: 35.8+ KB
    
    
    
    
    
    Mauna_Loa_CO2.isnull().any()
    
    
    
    date    False
    co2      True
    dtype: bool
    
    
    
    Mauna_Loa_CO2_Miss=Mauna_Loa_CO2.fillna(method='ffill')
    Mauna_Loa_CO2_Miss.info()
    
    
    
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 2284 entries, 0 to 2283
    Data columns (total 2 columns):
     #   Column  Non-Null Count  Dtype  
    ---  ------  --------------  -----  
     0   date    2284 non-null   object 
     1   co2     2284 non-null   float64
    dtypes: float64(1), object(1)
    memory usage: 35.8+ KB
    
    
    
    Mauna_Loa_CO2_Miss.isnull().any()
    
    
    
    date    False
    co2     False
    dtype: bool
    
    
    
    import pandas as pd
    Mauna_Loa_CO2_Miss['date']=pd.to_datetime(Mauna_Loa_CO2_Miss['date'])
    Mauna_Loa_CO2_Miss.set_index('date',inplace=True)
    
    Mauna_Loa_CO2_Miss_Mon=Mauna_Loa_CO2_Miss['co2'].resample('MS').mean()
    Mauna_Loa_CO2_Miss_Mon2=pd.DataFrame(Mauna_Loa_CO2_Miss_Mon)
    Mauna_Loa_CO2_Miss_Mon2.head()
    
    
    
    co2 date
    1958-03-01 316.100
    1958-04-01 317.200
    1958-05-01 317.420
    1958-06-01 317.900
    1958-07-01 315.625
    
    
    import matplotlib.pyplot as plt
    Mauna_Loa_CO2_Miss.plot(figsize=(15, 6))
    plt.title("CO2 Trend") 
    plt.xlabel("Month") 
    plt.ylabel("CO2") 
    plt.show()
    
    
    
    output_14_0.png
    
    
    from statsmodels.tsa.seasonal import seasonal_decompose
    from pylab import rcParams
    rcParams['figure.figsize'] = 11, 9
    decomposition = sm.tsa.seasonal_decompose(Mauna_Loa_CO2_Miss_Mon2, model='additive')
    fig = decomposition.plot()
    plt.show()
    
    
    
    output_15_0.png

    3.序列平稳性检验

    
    
    get_ipython().run_line_magic('matplotlib', 'inline')
    import pandas as pd
    from statsmodels.tsa.stattools import adfuller
    import matplotlib.pyplot as plt
    def test_stationarity(timeseries):
        #计算均值与方差
        rolmean = timeseries.rolling(window=12).mean()
        rolstd = timeseries.rolling(window=12).std()
        #绘制图形:
        fig = plt.figure(figsize=(12, 8))
        orig = plt.plot(timeseries, color='blue',label='Original')
        mean = plt.plot(rolmean, color='red', label='Rolling Mean')
        std = plt.plot(rolstd, color='black', label = 'Rolling Std')
        plt.legend(loc='best')# 使用自动最佳的图例显示位置
        plt.title('Rolling Mean & Standard Deviation')
        plt.xlabel("Month") #添加X轴说明
        plt.ylabel("CO2") #添加Y轴说明
        plt.show()#观察是否平稳
        print('ADF检验结果:')
        #进行ADF检验
        print('Results of Dickey-Fuller Test:')
    # 使用减小AIC的办法估算ADF测试所需的滞后数
        dftest = adfuller(timeseries, autolag='AIC')
        # 将ADF测试结果、显著性概率、所用的滞后数和所用的观测数打印出来
        dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
        for key,value in dftest[4].items():
            dfoutput['Critical Value (%s)'%key] = value
        print(dfoutput)
    test_stationarity(Mauna_Loa_CO2_Miss_Mon2['co2'])
    
    
    
    output_17_0.png
    ADF检验结果:
    Results of Dickey-Fuller Test:
    Test Statistic                   2.140106
    p-value                          0.998829
    #Lags Used                      15.000000
    Number of Observations Used    510.000000
    Critical Value (1%)             -3.443237
    Critical Value (5%)             -2.867224
    Critical Value (10%)            -2.569797
    dtype: float64
    

    4.序列变换

    
    
    Mauna_Loa_CO2_Miss_Mon2['first_difference'] =Mauna_Loa_CO2_Miss_Mon2['co2'].diff(1)  
    test_stationarity(Mauna_Loa_CO2_Miss_Mon2['first_difference'].dropna(inplace=False))
    
    
    
    output_19_0.png
    ADF检验结果:
    Results of Dickey-Fuller Test:
    Test Statistic                  -4.824703
    p-value                          0.000049
    #Lags Used                      19.000000
    Number of Observations Used    505.000000
    Critical Value (1%)             -3.443366
    Critical Value (5%)             -2.867280
    Critical Value (10%)            -2.569827
    dtype: float64
    

    5.白噪音检验

    
    
    Mauna_Loa_CO2_Miss_Mon2['seasonal_difference'] = Mauna_Loa_CO2_Miss_Mon2['first_difference'].diff(12)  
    test_stationarity(Mauna_Loa_CO2_Miss_Mon2['seasonal_difference'].dropna(inplace=False))
    
    
    
    
    
    from statsmodels.stats.diagnostic import acorr_ljungbox 
    print(acorr_ljungbox(Mauna_Loa_CO2_Miss_Mon2['seasonal_difference'].iloc[13:], lags=1))
    
    
    

    6.确定参数

    
    
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(Mauna_Loa_CO2_Miss_Mon2['seasonal_difference'].iloc[13:], lags=40, ax=ax1) 
    #从13开始是因为做季节性差分时window是12
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(Mauna_Loa_CO2_Miss_Mon2['seasonal_difference'].iloc[13:], lags=40, ax=ax2)
    
    
    
    
    
    import warnings
    import itertools
    import numpy as np
    
    import pandas as pd
    p = d = q = range(0, 2)
    pdq = list(itertools.product(p, d, q))
    seasonal_pdq = [(x[0], x[1], x[2], 12) 
    for x in list(itertools.product(p, d, q))]
    warnings.filterwarnings("ignore") 
    # specify to ignore warning messages
    AIC_Value = []
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                mod = sm.tsa.statespace.SARIMAX(Mauna_Loa_CO2_Miss_Mon2['co2'],
                                                order=param,
                                                seasonal_order=param_seasonal,
                                                enforce_stationarity=False,
                                                enforce_invertibility=False)
                results = mod.fit()
                AIC_Value.append(results.aic)
                print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
            except:
                continue
    
    
    

    7.模型结果

    
    
    mod = sm.tsa.statespace.SARIMAX(Mauna_Loa_CO2_Miss_Mon2['co2'],
                                    order=(1, 1, 1),
                                    seasonal_order=(1, 1, 1, 12),
                                    enforce_stationarity=False,
                                    enforce_invertibility=False)
    results = mod.fit()
    print(results.summary().tables[1])
    
    
    
    
    
    results.plot_diagnostics(figsize=(15, 12))
    plt.show()
    
    
    
    
    
    pred = results.get_prediction(start=pd.to_datetime('1995-01-01'), dynamic=False)
    pred_ci = pred.conf_int()
    ax = Mauna_Loa_CO2_Miss_Mon2['1990':].plot(label='observed')
    ax.set_ylim(350, 380)
    pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7)
    ax.fill_between(pred_ci.index,
                    pred_ci.iloc[:, 0],
                    pred_ci.iloc[:, 1], color='r', alpha=.9)
    ax.set_xlabel('Date')
    ax.set_ylabel('CO2 Levels')
    plt.legend()
    plt.show()
    
    
    

    8.总结

    残差图的分布基本为标准正态分布,拟合残差基本为白噪音,其拟合效果比较好。

    相关文章

      网友评论

          本文标题:时间序列分析_co2的预测分析

          本文链接:https://www.haomeiwen.com/subject/mslphltx.html