Pandas7 pandas合并 merge
%concatenating比merge简单的dataframe合并
%merge可以实现索引以及key的对照的合并
import pandas as pd
import numpy as np
left = pd.DataFrame({
'key':['K0','K1','K2','K3'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']
})
right = pd.DataFrame({
'key':['K0','K1','K2','K3'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']
})
%merge考虑key和index同时合并。
%其中key就是一个columns
print(left)
print(right)
print('---------------------------------')
res1= pd.merge(left,right)
print(res1)
%基于哪一个index或者哪一个columns合并呢?
%下面输入一个参数,on='key',即基于key 这个columns进行合并
res2 = pd.merge(left,right,on='key')
print(res2)
print('---------------------------------')
consider two key
import pandas as pd
import numpy as np
left1 = pd.DataFrame({
'key1':['K0','K0','K1','K2'],
'key2':['K0','K1','K0','K1'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']
})
right1 = pd.DataFrame({
'key1':['K0','K1','K1','K2'],
'key2':['K0','K0','K0','K0'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']
})
print(left1)
print(right1)
%考虑两个key,默认inner,只考虑相同的key,把相同的部分合并起来
%可以看出上述相同的两个key是K0K0,K2K0
%how=['left','right','outer','inner']
%基于哪种形式进行合并
res=pd.merge(left1,right1,on=['key1','key2'],how='outer',indicator=True)
%indicater告诉你以哪种形式进行合并的,显示出来合并方式
print(res)
%给出一个名字indicater_column
res=pd.merge(left1,right1,on=['key1','key2'],how='outer',indicator='indicater_column')
print(res)
%可以看出最后一列label为:_merge变为indicater_column
print('---------------------------------')
%left_index right_index
%是否考虑right和left中的index
%如何处理合并中的overlapping问题
%handle overlapping
boys = pd.DataFrame({
'k':['K0','K1','K2'],
'age':[1,2,3]
})
girls = pd.DataFrame({
'k':['K0','K0','K3'],
'age':[4,5,6]
})
print(boys)
print(girls)
%合并时候age是重复的,但是怎么区分呢?
print(pd.merge(boys,girls,on='k',suffixes=['_boy','_girl'],how='inner'))
%上面代码可以区分出age是谁的age
%join与merge相同,但是merge更加常用
运行部分结果:
image.png
image.png
网友评论