pandas
- pandas = pannel data + data analysis
- anthor:Wes McKinney
Series
#给数据的索引命名,提高可读性
from pandas import Series
obj=Series([4,7,-5,2],['b','d','c','a'])
DataFrame
#表格形数据结构
#create dataframe by passing a dict object
data = {'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],
... 'year':[2000,2001,2002,2001,2002],
... 'pop':[1.5,1.7,3.6,2.4,2.9]}
print DataFrame(data,columns = ['state','year','pop'])
#create dataframe by passing numpy array
date=pd.date_range('20160101',periods=10)
df=pd.DataFrame(np.random.randn(10,4),index=date,columns=list('ABCD')
Index
#索引对象
from pandas import DataFram,Series
#reindex
obj=Series('bule','purple','yellow',index=[0,2,4])
print obj.reindex(range(6),method='fill')
#drop index
obj.drop('1')
#axis 0行 1列
import numpy as np
data=DataFrame(np.arange(16).reshape(4,4),index=['Chio','Colorado','Utah','NewYork'],columns=['one','two','three','four'])
data.ix['Colorado',['two','three']]
匿名函数
import numpy as np
from pandas import Series,DataFrame
frame=DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Chio','Texas','Oregon']
np.abs(frame)
#函数应用到某列或某行
f=lambda x:x.max()-x.min()
print frame.apply(f)
print frame.apply(f,axix=1)
#函数map到每个元素
_format=lambda x:'%.2f' % x
print frame.applymap(_format)
print frame['e'].map(_format)
排序
from pandas import DataFream,Series
frame=DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Chio','Texas','Oregon']
frame.sort_index()
join
import pandas as pd
left = pd.DataFrame({'key':['foo','foo'],'lval':[1,2]})
right = pd.DataFrame({'key':['foo','foo'],'rval':[4,5]})
print left
key lval
0 foo 1
1 foo 2
print right
key rval
0 foo 4
1 foo 5
pd.merge(left,right,on='key')
key lval rval
0 foo 1 4
1 foo 1 5
2 foo 2 4
3 foo 2 5
groupby
import pandas as pd
df=pd.DataFrame({'A':['boo','bar','boo','bar'],'B':['one','two','three','four'],'C':np.random.randn(4),'D':np.random.randn(4)})
df.groupby('A').sum()
C D
A
bar 0.558913 0.163351
boo -0.936902 -1.450482
网友评论