美文网首页
matplotlib日期坐标轴及dataframe的merge操

matplotlib日期坐标轴及dataframe的merge操

作者: dgatiger | 来源:发表于2020-03-18 00:40 被阅读0次

    1.主要问题

    主要记录dataframe构造,非连续时间做X轴的处理以及pandas中dataframe的连接操作。

    2.Demo代码

    # coding:utf8
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import datetime
    import matplotlib.dates as mdates
    
    
    # 方法1: 默认index是X轴 达到预期
    def p1():
        ax = fig.add_subplot(311)
        ax.plot(df_days['data'], label='data')  # 默认以index作为x轴
    
        for a, b in zip(df_days.index.to_list(), df_days['data']):
            ax.text(a, b, '{:,.2f}'.format(b), ha='center', fontsize=14, va='bottom', color='blue')
    
        # 画X轴刻度及label
        # 方法1:用plt.xticks
        # xticks两个参数只在plt有,同时设置刻度跟标签
        plt.xticks(df_days.index.to_list(), xlabels, rotation=90)
        # ax.xticks(df_days.index.to_list(), xlabels, rotation=90)
        # #AttributeError: 'AxesSubplot' object has no attribute 'xticks'
    
        # 画X轴刻度及label
        # 方法2:用ax.set_xticks及ax.set_xticklabels
        # ax的ticks跟tickslabel要分开设置!!!
        # ax.set_xticks(df_days.index.to_list())  # Index作刻度
        # ax.set_xticklabels(xlabels, rotation=30)
    
        ax.grid()
        ax.legend()
    
    
    # 方法3:用非index做X轴,若是日期型,或者mdate.date2num转数字为x轴tick,去掉的周末会留空,非预期
    def p2():
        ax = fig.add_subplot(312)
        # plt.plot(df_days["trd_date"], df_days['data'], label='data')
        # 等价于,日期即转不转数字,内部都是以数字处理,即当前日期与"0000-01-01 00:00:00"相减的天数
        plt.plot(df_days["trd_date"].map(lambda x: mdates.date2num(x)), df_days['data'], label='data')
    
        for a, b in zip(df_days["trd_date"], df_days['data']):
            ax.text(mdates.date2num(a), b, '{:,.2f}'.format(b), ha='center', fontsize=14, va='bottom')
    
        plt.xticks(df_days["trd_date"], xlabels, rotation=45)
        ax.grid()
        ax.legend()
    
    
    # 方法3:用非Index做X轴,且非日期型 达到预期
    def p3():
        ax = fig.add_subplot(313)
        ax.plot(xlabels, df_days['data'], label='data')
    
        for a, b in zip(df_days.index.to_list(), df_days['data']):
            ax.text(a, b, '{:,.2f}'.format(b), ha='center', fontsize=14, va='bottom')
    
        ax.set_xticklabels(xlabels, rotation=35)
        ax.grid()
        ax.legend()
    
    
    def get_df_without_56(days=29):
        # 起始日期
        start_date = datetime.date.today() - datetime.timedelta(days=days)
        # 日期序列
        day_list = pd.date_range(start_date, periods=days, freq='D')
        # 序列里直接过滤掉周末
        day_list = day_list[day_list.weekday < 5]
        # 生产随机Demo数据
        data_list = np.random.rand(days - int(days / 7) * 2) * 100
    
        # 生成dataframe方法1:两步走
        df_days = pd.DataFrame(data=day_list, columns=['trd_date'])
        df_days['data'] = data_list  # 增加数据列
        return df_days
    
    
    def get_df_without_01(days=29):
        # 起始日期
        start_date = datetime.date.today() - datetime.timedelta(days=days)
        # 日期序列
        day_list = pd.date_range(start_date, periods=days, freq='D')
        # 序列里直接过滤周一周二
        day_list = day_list[day_list.weekday > 1]
        # 生产随机Demo数据
        data_list = np.random.rand(days - int(days / 7) * 2) * 100
    
        # 生成dataframe方法2:zip后一步到位
        df_days = pd.DataFrame(list(zip(day_list, data_list)), columns=['trd_date', 'data'])
        return df_days
    
    
    def df_merge_demo(left, right):
        # 默认以重叠的列名当做连接键,本例中等同于pd.merge(left,right,on=['trd_date','data'],how='inner')
        defaultmerge = pd.merge(left, right)
        print("\n测试:defaultmerge=pd.merge(left, right)")
        print(defaultmerge.head(100))
    
        # 等同于pd.concat([left,right]).reset_index(drop=True)
        defaultouter = pd.merge(left, right, how='outer')
        print("\n测试:defaultouter = pd.merge(left, right, how='outer')")
        print(defaultouter.head(100))
        pdconcat = pd.concat([left, right]).reset_index(drop=True)
        print("\n测试:pdconcat= pd.concat([left, right]).reset_index(drop=True)")
        print(pdconcat)
    
        # 如果两个对象的key列名不同,可以分别指定,例:pd.merge(left,right,left_on='lkey',right_on='rkey')
        leftjoin = pd.merge(left, right, on='trd_date', how='left')
        print("\n测试:leftjoin = pd.merge(left, right, on='trd_date', how='left')")
        print(leftjoin.head(100))
        # 多键连接时将连接键组成列表传入,例:pd.merge(left,right,on=['key1','key2']
        rightjoin = pd.merge(left, right, on='trd_date', how='right')
        print("\n测试:rightjoin = pd.merge(left, right, on='trd_date', how='right')")
        print(rightjoin.head(100))
    
        innerjoin = pd.merge(left, right, on='trd_date', how='inner')  # 默认为inner
        print("\n测试:innerjoin = pd.merge(left, right, on='trd_date', how='inner')")
        print(innerjoin.head(100))
    
        outerjoin = pd.merge(left, right, on='trd_date', how='outer')
        print("\n测试:innerjoin = pd.merge(left, right, on='trd_date', how='outer')")
        print(outerjoin.head(100))
    
        left = left.set_index('trd_date')  # 设置索引
        right = right.set_index('trd_date') # 设置索引
        # join连接后重建索引
        dfjoin = left.join(right, lsuffix='_l', rsuffix='_r',sort=False).reset_index()
        print("\n测试:dfjoin = left.join(right).reset_index()")
        print(dfjoin.head(100))
        dfjoin.rename(columns={'trd_date': '交易日期'}, inplace=True)  # 改列名
        print("\n测试:dfjoin.rename(columns={'trd_date': '交易日期'}, inplace=True)  # 改列名")
        print(dfjoin.head(100))
    
    
    if __name__ == '__main__':
        df_days = get_df_without_56(15)
        df_days_01 = get_df_without_01(15)
    
        # dataframe merge 测试
        df_merge_demo(df_days, df_days_01)
    
        fig = plt.figure(figsize=(20, 10), dpi=80)
        # 生成x轴的label,方法1: map然后lambda表达式
        xlabels = df_days["trd_date"].map(lambda x: x.strftime('%m-%d'))
        # 或者 方法2: for in 循环生成数组
        # xlabels = [date.strftime('%m-%d') for (date) in df_days["trd_date"]]
    
        p1()
        p2()
        p3()
    
        plt.show()
        plt.close()
    

    3.执行结果

    测试:defaultmerge=pd.merge(left, right)
    Empty DataFrame
    Columns: [trd_date, data]
    Index: []
    测试:defaultouter = pd.merge(left, right, how='outer')
         trd_date       data
    0  2020-02-20  28.519733
    1  2020-02-21  21.957296
    2  2020-02-24  81.709042
    3  2020-02-25  49.141968
    4  2020-02-26  75.962113
    5  2020-02-27  34.297047
    6  2020-02-28   8.599335
    7  2020-03-02  59.895028
    8  2020-03-03  36.150423
    9  2020-03-04  41.961246
    10 2020-03-05  78.180868
    11 2020-02-20  81.522147
    12 2020-02-21  66.379312
    13 2020-02-22  68.038422
    14 2020-02-23  82.831858
    15 2020-02-26  44.112500
    16 2020-02-27   9.492410
    17 2020-02-28  53.929783
    18 2020-02-29  19.431358
    19 2020-03-01  59.103261
    20 2020-03-04  66.754922
    21 2020-03-05  66.497167
    测试:pdconcat= pd.concat([left, right]).reset_index(drop=True)
         trd_date       data
    0  2020-02-20  28.519733
    1  2020-02-21  21.957296
    2  2020-02-24  81.709042
    3  2020-02-25  49.141968
    4  2020-02-26  75.962113
    5  2020-02-27  34.297047
    6  2020-02-28   8.599335
    7  2020-03-02  59.895028
    8  2020-03-03  36.150423
    9  2020-03-04  41.961246
    10 2020-03-05  78.180868
    11 2020-02-20  81.522147
    12 2020-02-21  66.379312
    13 2020-02-22  68.038422
    14 2020-02-23  82.831858
    15 2020-02-26  44.112500
    16 2020-02-27   9.492410
    17 2020-02-28  53.929783
    18 2020-02-29  19.431358
    19 2020-03-01  59.103261
    20 2020-03-04  66.754922
    21 2020-03-05  66.497167
    测试:leftjoin = pd.merge(left, right, on='trd_date', how='left')
         trd_date     data_x     data_y
    0  2020-02-20  28.519733  81.522147
    1  2020-02-21  21.957296  66.379312
    2  2020-02-24  81.709042        NaN
    3  2020-02-25  49.141968        NaN
    4  2020-02-26  75.962113  44.112500
    5  2020-02-27  34.297047   9.492410
    6  2020-02-28   8.599335  53.929783
    7  2020-03-02  59.895028        NaN
    8  2020-03-03  36.150423        NaN
    9  2020-03-04  41.961246  66.754922
    10 2020-03-05  78.180868  66.497167
    测试:rightjoin = pd.merge(left, right, on='trd_date', how='right')
         trd_date     data_x     data_y
    0  2020-02-20  28.519733  81.522147
    1  2020-02-21  21.957296  66.379312
    2  2020-02-26  75.962113  44.112500
    3  2020-02-27  34.297047   9.492410
    4  2020-02-28   8.599335  53.929783
    5  2020-03-04  41.961246  66.754922
    6  2020-03-05  78.180868  66.497167
    7  2020-02-22        NaN  68.038422
    8  2020-02-23        NaN  82.831858
    9  2020-02-29        NaN  19.431358
    10 2020-03-01        NaN  59.103261
    测试:innerjoin = pd.merge(left, right, on='trd_date', how='inner')
        trd_date     data_x     data_y
    0 2020-02-20  28.519733  81.522147
    1 2020-02-21  21.957296  66.379312
    2 2020-02-26  75.962113  44.112500
    3 2020-02-27  34.297047   9.492410
    4 2020-02-28   8.599335  53.929783
    5 2020-03-04  41.961246  66.754922
    6 2020-03-05  78.180868  66.497167
    测试:innerjoin = pd.merge(left, right, on='trd_date', how='outer')
         trd_date     data_x     data_y
    0  2020-02-20  28.519733  81.522147
    1  2020-02-21  21.957296  66.379312
    2  2020-02-24  81.709042        NaN
    3  2020-02-25  49.141968        NaN
    4  2020-02-26  75.962113  44.112500
    5  2020-02-27  34.297047   9.492410
    6  2020-02-28   8.599335  53.929783
    7  2020-03-02  59.895028        NaN
    8  2020-03-03  36.150423        NaN
    9  2020-03-04  41.961246  66.754922
    10 2020-03-05  78.180868  66.497167
    11 2020-02-22        NaN  68.038422
    12 2020-02-23        NaN  82.831858
    13 2020-02-29        NaN  19.431358
    14 2020-03-01        NaN  59.103261
    测试:dfjoin = left.join(right).reset_index()
         trd_date     data_l     data_r
    0  2020-02-20  28.519733  81.522147
    1  2020-02-21  21.957296  66.379312
    2  2020-02-24  81.709042        NaN
    3  2020-02-25  49.141968        NaN
    4  2020-02-26  75.962113  44.112500
    5  2020-02-27  34.297047   9.492410
    6  2020-02-28   8.599335  53.929783
    7  2020-03-02  59.895028        NaN
    8  2020-03-03  36.150423        NaN
    9  2020-03-04  41.961246  66.754922
    10 2020-03-05  78.180868  66.497167
    测试:dfjoin.rename(columns={'trd_date': '交易日期'}, inplace=True)  # 改列名
             交易日期     data_l     data_r
    0  2020-02-20  28.519733  81.522147
    1  2020-02-21  21.957296  66.379312
    2  2020-02-24  81.709042        NaN
    3  2020-02-25  49.141968        NaN
    4  2020-02-26  75.962113  44.112500
    5  2020-02-27  34.297047   9.492410
    6  2020-02-28   8.599335  53.929783
    7  2020-03-02  59.895028        NaN
    8  2020-03-03  36.150423        NaN
    9  2020-03-04  41.961246  66.754922
    10 2020-03-05  78.180868  66.497167
    

    相关文章

      网友评论

          本文标题:matplotlib日期坐标轴及dataframe的merge操

          本文链接:https://www.haomeiwen.com/subject/acufyhtx.html