美文网首页
pandas基础命令

pandas基础命令

作者: 刘不吃草也能跑 | 来源:发表于2017-05-10 23:33 被阅读0次

    pandas

    • pandas = pannel data + data analysis
    • anthor:Wes McKinney

    Series

    #给数据的索引命名,提高可读性
    from pandas import Series
    obj=Series([4,7,-5,2],['b','d','c','a'])
    
    
    

    DataFrame

    #表格形数据结构
    #create dataframe by passing a dict object
    data = {'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],
    ... 'year':[2000,2001,2002,2001,2002],
    ... 'pop':[1.5,1.7,3.6,2.4,2.9]}
    print DataFrame(data,columns = ['state','year','pop'])
    
    #create dataframe by passing numpy array
    date=pd.date_range('20160101',periods=10)
    df=pd.DataFrame(np.random.randn(10,4),index=date,columns=list('ABCD')
    

    Index

    #索引对象
    from pandas import DataFram,Series
    
    
    
    #reindex
    obj=Series('bule','purple','yellow',index=[0,2,4])
    print obj.reindex(range(6),method='fill')
    
    #drop index
    obj.drop('1')
    
    #axis 0行 1列
    import numpy as np
    data=DataFrame(np.arange(16).reshape(4,4),index=['Chio','Colorado','Utah','NewYork'],columns=['one','two','three','four'])
    data.ix['Colorado',['two','three']]
    

    匿名函数

    import numpy as np
    from pandas import Series,DataFrame
    frame=DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Chio','Texas','Oregon']
    np.abs(frame)
    #函数应用到某列或某行
    f=lambda x:x.max()-x.min()
    print frame.apply(f)
    print frame.apply(f,axix=1)
    
    #函数map到每个元素
    _format=lambda x:'%.2f' % x
    print frame.applymap(_format)
    print frame['e'].map(_format)
    

    排序

    from pandas import DataFream,Series
    frame=DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Chio','Texas','Oregon']
    frame.sort_index()
    
    

    join

    import pandas as pd
    left = pd.DataFrame({'key':['foo','foo'],'lval':[1,2]})
    right = pd.DataFrame({'key':['foo','foo'],'rval':[4,5]})
    print left
       key  lval
    0  foo     1
    1  foo     2
    print right
       key  rval
    0  foo     4
    1  foo     5
    pd.merge(left,right,on='key')
       key  lval  rval
    0  foo     1     4
    1  foo     1     5
    2  foo     2     4
    3  foo     2     5
    

    groupby

    import pandas as pd
    df=pd.DataFrame({'A':['boo','bar','boo','bar'],'B':['one','two','three','four'],'C':np.random.randn(4),'D':np.random.randn(4)})
    df.groupby('A').sum()
                C         D
    A                      
    bar  0.558913  0.163351
    boo -0.936902 -1.450482
    

    相关文章

      网友评论

          本文标题:pandas基础命令

          本文链接:https://www.haomeiwen.com/subject/hgsntxtx.html