表的连接与分组操作
import pandas as pd
from pandas import DataFrame
df1 = DataFrame({'name':['ZhangFei', 'GuanYu', 'a', 'b', 'c'], 'data1':range(1,6)})
df2 = DataFrame({'name':['ZhangFei', 'GuanYu', 'A', 'B', 'C'], 'data2':range(1,6)})
print(df1)
print(df2)
df3 = pd.merge(df1, df2, on='name') #以'name'字段合并同内向
print(df3)
name data1
0 ZhangFei 1
1 GuanYu 2
2 a 3
3 b 4
4 c 5
name data2
0 ZhangFei 1
1 GuanYu 2
2 A 3
3 B 4
4 C 5
name data1 data2
0 ZhangFei 1 1
1 GuanYu 2 2
df3 = pd.merge(df1, df2, how='inner')
print(df3)
name data1 data2
0 ZhangFei 1 1
1 GuanYu 2 2
df3 = pd.merge(df1, df2, how='left')
print(df3)
name data1 data2
0 ZhangFei 1 1.0
1 GuanYu 2 2.0
2 a 3 NaN
3 b 4 NaN
4 c 5 NaN
df3 = pd.merge(df1, df2, how='right')
print(df3)
name data1 data2
0 ZhangFei 1.0 1
1 GuanYu 2.0 2
2 A NaN 3
3 B NaN 4
4 C NaN 5
df3 = pd.merge(df1, df2, how='outer')
print(df3)
name data1 data2
0 ZhangFei 1.0 1.0
1 GuanYu 2.0 2.0
2 a 3.0 NaN
3 b 4.0 NaN
4 c 5.0 NaN
5 A NaN 3.0
6 B NaN 4.0
7 C NaN 5.0
import pandas as pd
import numpy as np
# 因为文件中有中文,所以采用gbk编码读取
data = pd.read_csv('E:\Data_Analysis_with_Python\L2\dataframe\heros2.csv', encoding='gbk')
print(data)
result2 = data.groupby('role').agg([np.sum, np.mean])
result1 = data.groupby('role')
for i in result1:
print(i)
#result.to_csv('heros2.csv')
print(result2)
name role Chinese Maths English
0 张飞 坦克 66 30 65
1 关羽 战士 95 98 85
2 刘备 战士 93 96 92
3 典韦 战士 90 77 88
4 许褚 坦克 80 90 90
('坦克', name role Chinese Maths English
0 张飞 坦克 66 30 65
4 许褚 坦克 80 90 90)
('战士', name role Chinese Maths English
1 关羽 战士 95 98 85
2 刘备 战士 93 96 92
3 典韦 战士 90 77 88)
Chinese Maths English
sum mean sum mean sum mean
role
坦克 146 73.000000 120 60.000000 155 77.500000
战士 278 92.666667 271 90.333333 265 88.333333
网友评论