美文网首页
Pandas04:表操作

Pandas04:表操作

作者: 罗泽坤 | 来源:发表于2020-03-28 21:09 被阅读0次

    表的连接与分组操作

    import pandas as pd
    from pandas import DataFrame
    
    df1 = DataFrame({'name':['ZhangFei', 'GuanYu', 'a', 'b', 'c'], 'data1':range(1,6)})
    df2 = DataFrame({'name':['ZhangFei', 'GuanYu', 'A', 'B', 'C'], 'data2':range(1,6)})
    print(df1)
    print(df2)
    df3 = pd.merge(df1, df2, on='name') #以'name'字段合并同内向
    print(df3)
    
           name  data1
    0  ZhangFei      1
    1    GuanYu      2
    2         a      3
    3         b      4
    4         c      5
           name  data2
    0  ZhangFei      1
    1    GuanYu      2
    2         A      3
    3         B      4
    4         C      5
           name  data1  data2
    0  ZhangFei      1      1
    1    GuanYu      2      2
    
    df3 = pd.merge(df1, df2, how='inner')
    print(df3)
    
           name  data1  data2
    0  ZhangFei      1      1
    1    GuanYu      2      2
    
    df3 = pd.merge(df1, df2, how='left')
    print(df3)
    
           name  data1  data2
    0  ZhangFei      1    1.0
    1    GuanYu      2    2.0
    2         a      3    NaN
    3         b      4    NaN
    4         c      5    NaN
    
    df3 = pd.merge(df1, df2, how='right')
    print(df3)
    
           name  data1  data2
    0  ZhangFei    1.0      1
    1    GuanYu    2.0      2
    2         A    NaN      3
    3         B    NaN      4
    4         C    NaN      5
    
    df3 = pd.merge(df1, df2, how='outer')
    print(df3)
    
           name  data1  data2
    0  ZhangFei    1.0    1.0
    1    GuanYu    2.0    2.0
    2         a    3.0    NaN
    3         b    4.0    NaN
    4         c    5.0    NaN
    5         A    NaN    3.0
    6         B    NaN    4.0
    7         C    NaN    5.0
    
    import pandas as pd
    import numpy as np
    # 因为文件中有中文,所以采用gbk编码读取
    data = pd.read_csv('E:\Data_Analysis_with_Python\L2\dataframe\heros2.csv', encoding='gbk')
    print(data)
    
    result2 = data.groupby('role').agg([np.sum, np.mean])
    result1 = data.groupby('role')
    for i in result1:
        print(i)
    #result.to_csv('heros2.csv')
    print(result2)
    
      name role  Chinese  Maths  English
    0   张飞   坦克       66     30       65
    1   关羽   战士       95     98       85
    2   刘备   战士       93     96       92
    3   典韦   战士       90     77       88
    4   许褚   坦克       80     90       90
    ('坦克',   name role  Chinese  Maths  English
    0   张飞   坦克       66     30       65
    4   许褚   坦克       80     90       90)
    ('战士',   name role  Chinese  Maths  English
    1   关羽   战士       95     98       85
    2   刘备   战士       93     96       92
    3   典韦   战士       90     77       88)
         Chinese            Maths            English           
             sum       mean   sum       mean     sum       mean
    role                                                       
    坦克       146  73.000000   120  60.000000     155  77.500000
    战士       278  92.666667   271  90.333333     265  88.333333

    相关文章

      网友评论

          本文标题:Pandas04:表操作

          本文链接:https://www.haomeiwen.com/subject/adxeuhtx.html