美文网首页
Pandas简单运用

Pandas简单运用

作者: 自学java的菜鸟小赵 | 来源:发表于2020-10-17 15:22 被阅读0次

    直接复制到编辑器里面学习就好了,哪个函数不清楚再去了解

    例1

    import pandas as pd
    import numpy as np
    
    data = pd.DataFrame(np.arange(12).reshape(3,4),index=list("123"),columns=list("abcd"))
    print(data.head(1))
    print(data.info) #z展示dataForm里面的数据信息
    print(data.describe()) #展示数据的统计信息
    print(data.sort_values(by='c',ascending=False)) #by通过哪个属性进行排序,ascending=True,表示升序
    print("*"*100)
    print(data.loc['1']['a']) #根据标签索引获取数据
    print(data.iloc[1][2]) #根据位置获取数据,这里是第2行第3列,所以显示6
    print("*"*100)
    '''
    ------------------------------------------------------
    '''
    #插入一列
    data.insert(4,'e',1)
    print(data)
    #读取一行
    print(data[:1])
    #读取多行
    print('读取多行\n',data.iloc[[1,2]])
    #读取多列
    print(data[['a','b']])
    #读取a列并转化成数组
    print(np.array(data.a))
    #转化成一维数组
    print(np.array(data[1:]).flatten())
    
    data.loc[4]=[1,2,3,4,5]
    print('增加行\n',data)
    data['f']=[1,2,3,4]
    print('增加列\n',data)
    print("*"*100)
    #读取列,可以直接指明列的索引值
    print(data['b'])
    print("*"*100)
    print(data[1:]['c'])
    print('data.index->******',list(data.index))
    print("*"*100)
    data2=[{"name":'zhangsan','age':12},{'age':12,'tel':113},{"name":'zhangsan','age':12,'tel':113}]
    data2_dataframe=pd.DataFrame(data2)
    print(data2_dataframe)
    
    print("*"*100)
    mean = data2_dataframe['tel'].mean()
    print(mean)
    #这里mean如果不知名列会自动计算当前数据列的平均值,也可以指明列名如data2_dataframe['tel'].mean()
    data2_dataframe=data2_dataframe.fillna(data2_dataframe.mean())
    print(data2_dataframe)
    
    print("*"*100)
    data3=[[1,2,3],[4,5,6],[1,2,3],[2,3,4]]
    data4=np.array(data3)
    print(type(data4))
    array = np.array(data3).flatten()
    print(set(array))
    

    输出

    E:\Python\python.exe E:/ideaproject/pythonProject/matplotlib/day04/pandas03.py
       a  b  c  d
    1  0  1  2  3
    <bound method DataFrame.info of    a  b   c   d
    1  0  1   2   3
    2  4  5   6   7
    3  8  9  10  11>
             a    b     c     d
    count  3.0  3.0   3.0   3.0
    mean   4.0  5.0   6.0   7.0
    std    4.0  4.0   4.0   4.0
    min    0.0  1.0   2.0   3.0
    25%    2.0  3.0   4.0   5.0
    50%    4.0  5.0   6.0   7.0
    75%    6.0  7.0   8.0   9.0
    max    8.0  9.0  10.0  11.0
       a  b   c   d
    3  8  9  10  11
    2  4  5   6   7
    1  0  1   2   3
    ****************************************************************************************************
    0
    6
    ****************************************************************************************************
       a  b   c   d  e
    1  0  1   2   3  1
    2  4  5   6   7  1
    3  8  9  10  11  1
       a  b  c  d  e
    1  0  1  2  3  1
    读取多行
        a  b   c   d  e
    2  4  5   6   7  1
    3  8  9  10  11  1
       a  b
    1  0  1
    2  4  5
    3  8  9
    [0 4 8]
    [ 4  5  6  7  1  8  9 10 11  1]
    增加行
        a  b   c   d  e
    1  0  1   2   3  1
    2  4  5   6   7  1
    3  8  9  10  11  1
    4  1  2   3   4  5
    增加列
        a  b   c   d  e  f
    1  0  1   2   3  1  1
    2  4  5   6   7  1  2
    3  8  9  10  11  1  3
    4  1  2   3   4  5  4
    ****************************************************************************************************
    1    1
    2    5
    3    9
    4    2
    Name: b, dtype: int64
    ****************************************************************************************************
    2     6
    3    10
    4     3
    Name: c, dtype: int64
    data.index->****** ['1', '2', '3', 4]
    ****************************************************************************************************
           name  age    tel
    0  zhangsan   12    NaN
    1       NaN   12  113.0
    2  zhangsan   12  113.0
    ****************************************************************************************************
    113.0
           name  age    tel
    0  zhangsan   12  113.0
    1       NaN   12  113.0
    2  zhangsan   12  113.0
    ****************************************************************************************************
    <class 'numpy.ndarray'>
    {1, 2, 3, 4, 5, 6}
    
    Process finished with exit code 0
    
    

    例2

    import pandas as pd
    import numpy as np
    
    df1=pd.DataFrame(np.ones((2,4)),index=['A','B'],columns=list('abcd'))
    print('df1\n',df1)
    
    df2=pd.DataFrame(np.zeros((3,3)),index=['A','B','C'],columns=list('xyz'))
    print('df2\n',df2)
    
    data1=df1.join(df2)
    print('data1\n',data1)
    
    data2=df2.join(df1)
    
    print('data2\n',data2)
    

    输出

          x    y    z
    A  0.0  0.0  0.0
    B  0.0  0.0  0.0
    C  0.0  0.0  0.0
    data1
          a    b    c    d    x    y    z
    A  1.0  1.0  1.0  1.0  0.0  0.0  0.0
    B  1.0  1.0  1.0  1.0  0.0  0.0  0.0
    data2
          x    y    z    a    b    c    d
    A  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    B  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    C  0.0  0.0  0.0  NaN  NaN  NaN  NaN
    

    例3

    import numpy as np
    import pandas as pd
    
    df1=pd.DataFrame(np.ones((2,4)),index=['A','B'],columns=list('abcd'))
    print('df1\n',df1)
    
    df2=pd.DataFrame(np.arange(9).reshape(3,3),columns=list("fax"))
    print('df2\n',df2)
    
    #根据a属性进行内连接 交集
    merge = df1.merge(df2,on='a')
    print('merge\n',merge)
    
    df1.loc['A']['a']=100
    merge2 = df1.merge(df2,on='a')
    print('merge2\n',merge2)
    
    #外连接  并集
    merge_outer=df1.merge(df2,on='a',how='outer')
    print('merge_outer\n',merge_outer)
    #左连接 
    merge_left=df1.merge(df2,on='a',how='left')
    print('merge_left\n',merge_left)
    #有连接
    merge_right=df1.merge(df2,on='a',how='right')
    print('merge_right\n',merge_right)
    

    输出

    df1
          a    b    c    d
    A  1.0  1.0  1.0  1.0
    B  1.0  1.0  1.0  1.0
    df2
        f  a  x
    0  0  1  2
    1  3  4  5
    2  6  7  8
    merge
          a    b    c    d  f  x
    0  1.0  1.0  1.0  1.0  0  2
    1  1.0  1.0  1.0  1.0  0  2
    merge2
          a    b    c    d  f  x
    0  1.0  1.0  1.0  1.0  0  2
    merge_outer
            a    b    c    d    f    x
    0  100.0  1.0  1.0  1.0  NaN  NaN
    1    1.0  1.0  1.0  1.0  0.0  2.0
    2    4.0  NaN  NaN  NaN  3.0  5.0
    3    7.0  NaN  NaN  NaN  6.0  8.0
    merge_left
            a    b    c    d    f    x
    0  100.0  1.0  1.0  1.0  NaN  NaN
    1    1.0  1.0  1.0  1.0  0.0  2.0
    merge_right
          a    b    c    d  f  x
    0  1.0  1.0  1.0  1.0  0  2
    1  4.0  NaN  NaN  NaN  3  5
    2  7.0  NaN  NaN  NaN  6  8
    
    Process finished with exit code 0
    

    例4

    import pandas as pd
    import numpy as np
    
    '''
    分组和聚合
    '''
    
    data1=pd.DataFrame(np.arange(24).reshape(4,6),index=['a','b','c','d'],columns=list("qwerty"))
    print('data1\n',data1)
    
    data1.iloc[[1,2]]=100
    print(data1)
    
    grouped = data1.groupby(by='r')
    
    for i,j in grouped:
        print(i)
        print("*"*100)
        print(j)
    
    print("*"*100)
    
    count=grouped['r'].count()
    print('count\n',count)
    

    输出

    data1
         q   w   e   r   t   y
    a   0   1   2   3   4   5
    b   6   7   8   9  10  11
    c  12  13  14  15  16  17
    d  18  19  20  21  22  23
         q    w    e    r    t    y
    a    0    1    2    3    4    5
    b  100  100  100  100  100  100
    c  100  100  100  100  100  100
    d   18   19   20   21   22   23
    3
    ****************************************************************************************************
       q  w  e  r  t  y
    a  0  1  2  3  4  5
    21
    ****************************************************************************************************
        q   w   e   r   t   y
    d  18  19  20  21  22  23
    100
    ****************************************************************************************************
         q    w    e    r    t    y
    b  100  100  100  100  100  100
    c  100  100  100  100  100  100
    ****************************************************************************************************
    count
     r
    3      1
    21     1
    100    2
    Name: r, dtype: int64
    

    例5

    import pandas as pd
    import numpy as np
    
    '''
    分组聚合联系和总结
    '''
    data = pd.DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':list("hjklmno")})
    print("data****\n",data)
    
    data2=data.set_index(['c','d'],drop=False)
    print('data2****\n',data2)
    
    data3=data.set_index(['d','c'])['a']
    print('data3*****\n',data3)
    
    data4 = data3.swaplevel()
    print('data4*****\n',data4)
    
    data5 = data2.loc['one'].loc['h']
    print('data5*****\n',data5)
    

    输出

    data****
        a  b    c  d
    0  0  7  one  h
    1  1  6  one  j
    2  2  5  one  k
    3  3  4  two  l
    4  4  3  two  m
    5  5  2  two  n
    6  6  1  two  o
    data2****
            a  b    c  d
    c   d              
    one h  0  7  one  h
        j  1  6  one  j
        k  2  5  one  k
    two l  3  4  two  l
        m  4  3  two  m
        n  5  2  two  n
        o  6  1  two  o
    data3*****
     d  c  
    h  one    0
    j  one    1
    k  one    2
    l  two    3
    m  two    4
    n  two    5
    o  two    6
    Name: a, dtype: int64
    data4*****
     c    d
    one  h    0
         j    1
         k    2
    two  l    3
         m    4
         n    5
         o    6
    Name: a, dtype: int64
    data5*****
     a      0
    b      7
    c    one
    d      h
    Name: h, dtype: object
    
    Process finished with exit code 0
    

    相关文章

      网友评论

          本文标题:Pandas简单运用

          本文链接:https://www.haomeiwen.com/subject/gexfmktx.html