美文网首页
金融时间序列处理

金融时间序列处理

作者: 哈劳斯军士 | 来源:发表于2017-06-27 15:01 被阅读156次

    太忙,好久不写笔记了。这两天有空,把该整理的好好整理一下。

    一、datetime库

    from datetime import datetime
    now = datetime.now()
    print(now)
    print(type(now))
    

    2017-06-07 23:13:03.505630
    <class 'datetime.datetime'>

    diff = datetime(2017, 3, 4, 17) - datetime(2017, 2, 18, 15)
    print(type(diff))
    print(diff)
    print('经历了{}天, {}秒。'.format(diff.days, diff.seconds))
    

    <class 'datetime.timedelta'>
    14 days, 2:00:00
    经历了14天, 7200秒。

    str >> datetime

    # strptime
    dt_str = '2017-02-18'
    dt_obj2 = datetime.strptime(dt_str, '%Y-%m-%d')
    print(type(dt_obj2))
    print(dt_obj2)
    

    <class 'datetime.datetime'>
    2017-02-18 00:00:00

    时间戳解析的用法:

    # dateutil.parser.parse
    from dateutil.parser import parse
    dt_str2 = '18-02-2017'
    dt_obj3 = parse(dt_str2)
    print(type(dt_obj3))
    print(dt_obj3)
    

    <class 'datetime.datetime'>
    2017-02-18 00:00:00

    pandas的时间戳:

    # pd.to_datetime
    import pandas as pd
    s_obj = pd.Series(['2017/02/18', '2017/02/19', '2017-02-25', '2017-02-26'], name='course_time')
    print(s_obj)
    

    0 2017/02/18
    1 2017/02/19
    2 2017-02-25
    3 2017-02-26
    Name: course_time, dtype: object

    s_obj2 = pd.to_datetime(s_obj)
    print(s_obj2)
    

    0 2017-02-18
    1 2017-02-19
    2 2017-02-25
    3 2017-02-26
    Name: course_time, dtype: datetime64[ns]

    # 处理缺失值
    s_obj3 = pd.Series(['2017/02/18', '2017/02/19', '2017-02-25', '2017-02-26'] + [None], 
                       name='course_time')
    print(s_obj3)
    

    0 2017/02/18
    1 2017/02/19
    2 2017-02-25
    3 2017-02-26
    4 None
    Name: course_time, dtype: object

    s_obj4 = pd.to_datetime(s_obj3)
    print(s_obj4) # NAT-> Not a Time
    

    0 2017-02-18
    1 2017-02-19
    2 2017-02-25
    3 2017-02-26
    4 NaT
    Name: course_time, dtype: datetime64[ns]

    datetime常用格式

    二、Pandas时间序列

    创建

    from datetime import datetime
    import pandas as pd
    import numpy as np
    
    # 指定index为datetime的list
    date_list = [datetime(2017, 2, 18), datetime(2017, 2, 19), 
                 datetime(2017, 2, 25), datetime(2017, 2, 26), 
                 datetime(2017, 3, 4), datetime(2017, 3, 5)]
    time_s = pd.Series(np.random.randn(6), index=date_list)
    print(time_s)
    print(type(time_s.index))
    

    2017-02-18 -0.230989
    2017-02-19 -0.398082
    2017-02-25 -0.309926
    2017-02-26 -0.179672
    2017-03-04 0.942698
    2017-03-05 1.053092
    dtype: float64
    <class 'pandas.core.indexes.datetimes.DatetimeIndex'>

    # pd.date_range()
    dates = pd.date_range('2017-02-18', # 起始日期
                          periods=5,    # 周期
                          freq='W-SAT') # 频率
    print(dates)
    print(pd.Series(np.random.randn(5), index=dates))
    #从2017-02-18开始每周六的数据,连续五周
    

    DatetimeIndex(['2017-02-18', '2017-02-25', '2017-03-04', '2017-03-11',
    '2017-03-18'],
    dtype='datetime64[ns]', freq='W-SAT')
    2017-02-18 -1.680280
    2017-02-25 0.908664
    2017-03-04 0.145318
    2017-03-11 -2.940363
    2017-03-18 0.152681
    Freq: W-SAT, dtype: float64

    索引
    太简单就不抄文档了。

    过滤

    time_s
    

    2017-02-18 -0.230989
    2017-02-19 -0.398082
    2017-02-25 -0.309926
    2017-02-26 -0.179672
    2017-03-04 0.942698
    2017-03-05 1.053092
    dtype: float64

    time_s.truncate(before='2017-2-25')
    

    2017-02-25 -0.309926
    2017-02-26 -0.179672
    2017-03-04 0.942698
    2017-03-05 1.053092
    dtype: float64

    time_s.truncate(after='2017-2-25')
    

    2017-02-18 -0.230989
    2017-02-19 -0.398082
    2017-02-25 -0.309926
    dtype: float64

    生成日期范围

    # 传入开始、结束日期,默认生成的该时间段的时间点是按天计算的
    date_index = pd.date_range('2017/02/18', '2017/03/18')
    print(date_index)
    

    DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
    '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
    '2017-02-26', '2017-02-27', '2017-02-28', '2017-03-01',
    '2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05',
    '2017-03-06', '2017-03-07', '2017-03-08', '2017-03-09',
    '2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
    '2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
    '2017-03-18'],
    dtype='datetime64[ns]', freq='D')

    # 只传入开始或结束日期,还需要传入时间段
    print(pd.date_range(start='2017/02/18', periods=10, freq='4D'))
    

    DatetimeIndex(['2017-02-18', '2017-02-22', '2017-02-26', '2017-03-02',
    '2017-03-06', '2017-03-10', '2017-03-14', '2017-03-18',
    '2017-03-22', '2017-03-26'],
    dtype='datetime64[ns]', freq='4D')

    print(pd.date_range(end='2017/03/18', periods=10))
    

    DatetimeIndex(['2017-03-09', '2017-03-10', '2017-03-11', '2017-03-12',
    '2017-03-13', '2017-03-14', '2017-03-15', '2017-03-16',
    '2017-03-17', '2017-03-18'],
    dtype='datetime64[ns]', freq='D')

    # 规范化时间戳 
    print(pd.date_range(start='2017/02/18 12:13:14', periods=10))
    print(pd.date_range(start='2017/02/18 12:13:14', periods=10, normalize=True))
    

    DatetimeIndex(['2017-02-18 12:13:14', '2017-02-19 12:13:14',
    '2017-02-20 12:13:14', '2017-02-21 12:13:14',
    '2017-02-22 12:13:14', '2017-02-23 12:13:14',
    '2017-02-24 12:13:14', '2017-02-25 12:13:14',
    '2017-02-26 12:13:14', '2017-02-27 12:13:14'],
    dtype='datetime64[ns]', freq='D')
    DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
    '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
    '2017-02-26', '2017-02-27'],
    dtype='datetime64[ns]', freq='D')

    频率与偏移量

    print(pd.date_range('2017/02/18', '2017/03/18', freq='2D'))
    

    DatetimeIndex(['2017-02-18', '2017-02-20', '2017-02-22', '2017-02-24',
    '2017-02-26', '2017-02-28', '2017-03-02', '2017-03-04',
    '2017-03-06', '2017-03-08', '2017-03-10', '2017-03-12',
    '2017-03-14', '2017-03-16', '2017-03-18'],
    dtype='datetime64[ns]', freq='2D')

    # 偏移量通过加法连接
    sum_offset = pd.tseries.offsets.Week(2) + pd.tseries.offsets.Hour(12)
    print(sum_offset)
    
    print(pd.date_range('2017/02/18', '2017/03/18', freq=sum_offset))
    

    14 days 12:00:00
    DatetimeIndex(['2017-02-18 00:00:00', '2017-03-04 12:00:00'], dtype='datetime64[ns]', freq='348H')

    移动数据
    ts = pd.Series(np.random.randn(5), index=pd.date_range('20170218', periods=5, freq='W-SAT')) print(ts)
    2017-02-18 -0.208622
    2017-02-25 0.616093
    2017-03-04 -0.424725
    2017-03-11 -0.361475
    2017-03-18 0.761274
    Freq: W-SAT, dtype: float64

    print(ts.shift(1))
    #print(ts.shift(-1))
    

    2017-02-18 NaN
    2017-02-25 -0.208622
    2017-03-04 0.616093
    2017-03-11 -0.424725
    2017-03-18 -0.361475
    Freq: W-SAT, dtype: float64

    三、重采样

    import pandas as pd
    import numpy as np
    
    date_rng = pd.date_range('20170101', periods=100, freq='D')
    ser_obj = pd.Series(range(len(date_rng)), index=date_rng)
    print(ser_obj.head(10))
    

    2017-01-01 0
    2017-01-02 1
    2017-01-03 2
    2017-01-04 3
    2017-01-05 4
    2017-01-06 5
    2017-01-07 6
    2017-01-08 7
    2017-01-09 8
    2017-01-10 9
    Freq: D, dtype: int32

    # 统计每个月的数据总和
    resample_month_sum = ser_obj.resample('M').sum()
    # 统计每个月的数据平均
    resample_month_mean = ser_obj.resample('M').mean()
    
    print('按月求和:', resample_month_sum)
    print('按月求均值:', resample_month_mean)
    

    按月求和: 2017-01-31 465
    2017-02-28 1246
    2017-03-31 2294
    2017-04-30 945
    Freq: M, dtype: int32
    按月求均值: 2017-01-31 15.0
    2017-02-28 44.5
    2017-03-31 74.0
    2017-04-30 94.5
    Freq: M, dtype: float64

    降采样

    five_day_sum_sample = ser_obj.resample('5D').sum()
    five_day_mean_sample = ser_obj.resample('5D').mean()
    five_day_ohlc_sample = ser_obj.resample('5D').ohlc()
    
    print('降采样,sum')
    print(five_day_sum_sample)
    

    降采样,sum
    2017-01-01 10
    2017-01-06 35
    2017-01-11 60
    2017-01-16 85
    2017-01-21 110
    2017-01-26 135
    2017-01-31 160
    2017-02-05 185
    2017-02-10 210
    2017-02-15 235
    2017-02-20 260
    2017-02-25 285
    2017-03-02 310
    2017-03-07 335
    2017-03-12 360
    2017-03-17 385
    2017-03-22 410
    2017-03-27 435
    2017-04-01 460
    2017-04-06 485
    Freq: 5D, dtype: int32

    相关文章

      网友评论

          本文标题:金融时间序列处理

          本文链接:https://www.haomeiwen.com/subject/aipscxtx.html