子查询
#coding=utf-8
import numpy as np
import pandas as pd
df1=pd.DataFrame({"city":["Chicago","SanFrancisco","Newyork"],
"rank":range(1,4)})
df2=pd.DataFrame({"city":["Chicago","Boston","Angles"],
"rank":range(1,4)})
print(df1)
'''
city rank
0 Chicago 1
1 SanFrancisco 2
2 Newyork 3
'''
print(df2)
'''
city rank
0 Chicago 1
1 Boston 2
2 Angles 3
'''
#举例1、子查询 isin 后面接 Seris
print(df1.loc[df1.city.isin(df2.city)])
'''
city rank
0 Chicago 1
'''
#举例2、子查询 isin 后面接 List
print(df1.loc[df1.city.isin(['Chicago','Newyork'])])
'''
city rank
0 Chicago 1
2 Newyork 3
'''
#举例3、isin 后也可以接DataFrame,需要两边的DataFrame列名,索引,值都一致时才会显示数据
print(df1[df1.isin(df2)])
'''
city rank
0 Chicago 1
1 NaN 2
2 NaN 3
'''
数据更新
tips=pd.read_csv("tips.csv").head()
print(tips)
'''
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
'''
#举例1、将tip列的值小于2的值乘以2, *=2
#先查询出tip列小于2的数据
print(tips.loc[tips.tip<2])
'''
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
'''
#仅只显示tip列
print(tips.loc[tips.tip<2,"tip"])
'''
0 1.01
1 1.66
Name: tip, dtype: float64
'''
#最后将,tip列的值小于2的数据乘以2(结果再重新赋值给tip列)
tips.loc[tips.tip<2,"tip"]*=2
print(tips)
'''
total_bill tip sex smoker day time size
0 16.99 2.02 Female No Sun Dinner 2
1 10.34 3.32 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
'''
#举例1、将tip列的值大于3.5的删除(即保留小于等于3.5)
print(tips.loc[tips.tip<=3.5])
tips=tips.loc[tips.tip<=3.5]
print(tips)
'''
total_bill tip sex smoker day time size
0 16.99 2.02 Female No Sun Dinner 2
1 10.34 3.32 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
'''
网友评论