#coding=utf-8
import pandas as pd
df1=pd.DataFrame({"lkey":["foo","bar","baz","lemon"],
"values":[1,2,3,5]}
)
df2=pd.DataFrame({"rkey":["foo","bar","baz","jack"],
"values":[5,6,7,8]}
)
print(df1,df2)
'''
lkey values
0 foo 1
1 bar 2
2 baz 3
3 lemon 5
rkey values
0 foo 5
1 bar 6
2 baz 7
3 jack 8
'''
#inner内连接
print(pd.merge(df1,df2,left_on="lkey",right_on="rkey",how="inner"))
'''
解释:
how="inner"指定两个df为内连接;
left_on="lkey",right_on="rkey" 指定两个df以lkey和rkey两列的值作为等值条件(类似sql中on t1.lkey=t2.rkey)
lkey values_x rkey values_y
0 foo 1 foo 5
1 bar 2 bar 6
2 baz 3 baz 7
'''
#inner内连接,修改关联值的列名的后缀名
print(pd.merge(df1,df2,left_on="lkey",right_on="rkey",how="inner",suffixes=("_leky","_rkey")))
'''
解释:
how="inner"指定两个df为内连接;
left_on="lkey",right_on="rkey" 指定两个df以lkey和rkey两列的值作为等值条件(类似sql中on t1.lkey=t2.rkey)
lkey values_leky rkey values_rkey
0 foo 1 foo 5
1 bar 2 bar 6
2 baz 3 baz 7
'''
#left左连接:产生表1的完全集,而表2中匹配的则有值,没有匹配到置为Null
print(pd.merge(df1,df2,left_on="lkey",right_on="rkey",how="left",suffixes=("_leky","_rkey")))
'''
解释:左连接的结果类似sql中left_join的结果
lkey values_leky rkey values_rkey
0 foo 1 foo 5.0
1 bar 2 bar 6.0
2 baz 3 baz 7.0
3 lemon 5 NaN NaN
'''
#right左连接:产生表2的完全集,而表1中匹配的则有值,没有匹配到置为Null
print(pd.merge(df1,df2,left_on="lkey",right_on="rkey",how="right",suffixes=("_leky","_rkey")))
'''
解释:右连接的结果类似sql中right_join的结果
lkey values_leky rkey values_rkey
0 foo 1.0 foo 5
1 bar 2.0 bar 6
2 baz 3.0 baz 7
3 NaN NaN jack 8
'''
#outer全连接
print(pd.merge(df1,df2,left_on="lkey",right_on="rkey",how="outer",suffixes=("_leky","_rkey")))
'''
解释:两表相同的列展示出value,不相同的列各自取null
lkey values_leky rkey values_rkey
0 foo 1.0 foo 5.0
1 bar 2.0 bar 6.0
2 baz 3.0 baz 7.0
3 lemon 5.0 NaN NaN
4 NaN NaN jack 8.0
'''
网友评论