#coding=utf-8
import pandas as pd
import numpy as np
data={
'company':['B','A','B','B'],
'gender':['female','female','male','male'],
'num':[40,31,28,28]
}
#传入字典创建DataFrame对象
df=pd.DataFrame(data)
print(df)
'''
company gender num
0 B female 40
1 A female 31
2 B male 28
3 B male 28
'''
#举例1、
#将列名 num -> age 修改成age,将索引 0-3 修改为 A-D
print(df.rename(columns={"num":"age"},index={0:"A",1:"B",2:"C",3:"D"}))
'''
company gender age
A B female 40
B A female 31
C B male 28
D B male 28
注:此方式只是生成了一个副本,并未真正修改原df对象,如果要修改原df对象,需要传递参数inplace=True
'''
df.rename(columns={"num":"age"},index={0:"A",1:"B",2:"C",3:"D"},inplace=True)
print(df)
'''
company gender age
A B female 40
B A female 31
C B male 28
D B male 28
'''
#举例2、set_index()将一列或多列设置为索引
print(df.set_index('company'))
'''
将company此列设置为索引
gender age
company
B female 40
A female 31
B male 28
B male 28
'''
print(df.set_index(['company','gender']))
'''
将company和gender 同时设置为索引
company gender
B female 40
A female 31
B male 28
male 28
'''
df.set_index(['company','gender'],inplace=True)
print(df)
'''
传入inplace=True ,使其真正修改原df对象
company gender
B female 40
A female 31
B male 28
male 28
'''
#举例3、重置索引reset_index()
print(df.reset_index(drop=False))
'''
参数解释:
drop=False 删除原索引,但是原索引的列会被保留
company gender age
0 B female 40
1 A female 31
2 B male 28
3 B male 28
'''
print(df.reset_index(drop=True))
'''
参数解释:
drop=True 删除原索引,原索引的列也会被删除
age
0 40
1 31
2 28
3 28
'''
df.reset_index(drop=False,inplace=True)
print(df)
'''
inplace=True,使其真正修改原df对象
company gender age
0 B female 40
1 A female 31
2 B male 28
3 B male 28
'''
#举例4、去重,drop_duplicates(subset,keep)
'''
参数解释:
subset=?指定对于某列进行去重判断,然后删除重复的行
keep=first/last
first表示对于重复的行删除时,保留排在首位的,默认为first
last表示对于重复的行删除时,保留排在末尾的。
'''
print(df.drop_duplicates(subset=["company"]))
'''
解释:指定company作为重复的值判断,删除第3和4行
company gender age
0 B female 40
1 A female 31
'''
print(df.drop_duplicates())
'''
解释:未指定某列,则将所有列作为重复的中判断,删除第4行
company gender age
0 B female 40
1 A female 31
2 B male 28
'''
print(df.drop_duplicates(subset=["company"],keep="last"))
'''
解释:指定company作为重复的值判断,同时指定keep="last",保留排在末尾的行所以保留了第4行
company gender age
1 A female 31
3 B male 28
'''
#举例5、isin() 常用于构建bool值,对DataFrame的数据进行筛选
print(df)
'''
company gender age
0 B female 40
1 A female 31
2 B male 28
3 B male 28
'''
print(df.gender.isin(['male']))
print(df.loc[df.gender.isin(['male'])])
'''
0 False
1 False
2 True
3 True
Name: gender, dtype: bool
company gender age
2 B male 28
3 B male 28
'''
print(df.loc[df.company.isin(['A'])])
'''
company gender age
1 A female 31
'''
网友评论