美文网首页编程
Python基础学习13

Python基础学习13

作者: ericblue | 来源:发表于2019-01-28 16:46 被阅读0次

    pandas库安装:

    pip3 install pandas
    Collecting pandas
      Downloading https://files.pythonhosted.org/packages/78/78/50ef81a903eccc4e90e278a143c9a0530f05199f6221d2e1b21025852982/pandas-0.23.4-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (14.6MB)
        100% |████████████████████████████████| 14.7MB 56kB/s
    Requirement already satisfied: numpy>=1.9.0 in /Users/.virtualenvs/py3env/lib/python3.6/site-packages (from pandas) (1.15.4)
    Collecting pytz>=2011k (from pandas)
      Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ReadTimeoutError("HTTPSConnectionPool(host='pypi.org', port=443): Read timed out. (read timeout=15)",)': /simple/pytz/
      Downloading https://files.pythonhosted.org/packages/61/28/1d3920e4d1d50b19bc5d24398a7cd85cc7b9a75a490570d5a30c57622d34/pytz-2018.9-py2.py3-none-any.whl (510kB)
        100% |████████████████████████████████| 512kB 43kB/s
    Collecting python-dateutil>=2.5.0 (from pandas)
      Downloading https://files.pythonhosted.org/packages/74/68/d87d9b36af36f44254a8d512cbfc48369103a3b9e474be9bdfe536abfc45/python_dateutil-2.7.5-py2.py3-none-any.whl (225kB)
        100% |████████████████████████████████| 235kB 26kB/s
    Requirement already satisfied: six>=1.5 in /Users/.virtualenvs/py3env/lib/python3.6/site-packages (from python-dateutil>=2.5.0->pandas) (1.11.0)
    Installing collected packages: pytz, python-dateutil, pandas
    Successfully installed pandas-0.23.4 python-dateutil-2.7.5 pytz-2018.9
    

    pandas的Series一维数组应用方法

    from pandas import Series, DataFrame
    import pandas as pd
    
    obj = Series([4, 5, 6, -7])#pandas一维数组定义
    
    print(obj)
    #输出结果如下是带索引一组数据
    0    4
    1    5
    2    6
    3   -7
    dtype: int64
    
    print( obj.index)
    #输出结果RangeIndex(start=0, stop=4, step=1)
    
    print ( obj.values)
    #输出结果[ 4  5  6 -7]
    

    字典中的key由哈希值生成唯一值不能修改,如果相同key值会导致对于value覆盖;列表(['a'])和集合({'b'})不能作为字典key,因为内容会变化。

    obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'c', 'a'])# 定义带索引字典
    print(obj2)
    #输出结果如下
    d    4
    b    7
    c   -5
    a    3
    dtype: int64
    
    obj2['c'] = 6# 可以直接给对应索引给值
    print(obj2)
    # 输出结果如下
    d    4
    b    7
    c    6
    a    3
    dtype: int64
    
    print ('f' in obj2)#可查找是否存在此索引
    #输出结果False
    
    sdata = {
        'beijing': 35000,
        'shanghai': 71000,
        'guangzhou': 16000,
        'shenzhen': 5000}
    obj3 = Series(sdata)#把字典转换为一维数组
    print( obj3)
    #输出结果如下
    beijing      35000
    shanghai     71000
    guangzhou    16000
    shenzhen      5000
    dtype: int64
    
    obj3.index = ['bj', 'gz', 'sh', 'sz']# 修改索引
    print( obj3)
    # 输出结果如下
    bj    35000
    gz    71000
    sh    16000
    sz     5000
    dtype: int64
    

    pandas的DataFrame多维数组应用方法

    from pandas import Series, DataFrame
    
    #字典中添加列表方式定义多维数据表格
    data = {'city': ['shanghai', 'shanghai', 'shanghai', 'beijing', 'beijing'],
            'year': [2016, 2017, 2018, 2017, 2018],
            'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
    
    frame = DataFrame(data)
    print(frame)
    #输出结果如下
           city  year  pop
    0  shanghai  2016  1.5
    1  shanghai  2017  1.7
    2  shanghai  2018  3.6
    3   beijing  2017  2.4
    4   beijing  2018  2.9
    
    frame2 = DataFrame(data, columns=['year', 'city', 'pop'])#自定义key值排列顺序
    print(frame2)
    #输出结果如下
       year      city  pop
    0  2016  shanghai  1.5
    1  2017  shanghai  1.7
    2  2018  shanghai  3.6
    3  2017   beijing  2.4
    4  2018   beijing  2.9
    
    print(frame2['city'])#提取列值
    #输出结果如下
    0    shanghai
    1    shanghai
    2    shanghai
    3     beijing
    4     beijing
    Name: city, dtype: object
    
    print(frame2.year)#提取列值另一种方法
    #输出结果如下
    0    2016
    1    2017
    2    2018
    3    2017
    4    2018
    Name: year, dtype: int64
    
    frame2['new'] = 100#新增列
    print(frame2)
    #输出结果如下
       year      city  pop  new
    0  2016  shanghai  1.5  100
    1  2017  shanghai  1.7  100
    2  2018  shanghai  3.6  100
    3  2017   beijing  2.4  100
    4  2018   beijing  2.9  100
    
    frame2['cap'] = frame2.city == 'beijing'#带判断新增列
    print( frame2)
    #输出结果如下
       year      city  pop  new    cap
    0  2016  shanghai  1.5  100  False
    1  2017  shanghai  1.7  100  False
    2  2018  shanghai  3.6  100  False
    3  2017   beijing  2.4  100   True
    4  2018   beijing  2.9  100   True
    
    #另一种字典中嵌套方式定义多维数据表格
    pop = {'beijing': {2008: 1.5, 2009: 2.0},
           'shanghai': {2008: 2.0, 2009: 3.6}
           }
    
    frame3 = DataFrame(pop)
    print(frame3)
    #输出结果如下
          beijing  shanghai
    2008      1.5       2.0
    2009      2.0       3.6
    
    print(frame3.T)#列行互换
    #输出结果如下
              2008  2009
    beijing    1.5   2.0
    shanghai   2.0   3.6
    
    
    obj4 = Series([4.5, 7.2, -5.3, 3.6], index=['b', 'd', 'c', 'a'])
    obj5 = obj4.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)#调整索引顺序并给新增列给0默认值
    print(obj5)
    #输出结果如下
    a    3.6
    b    4.5
    c   -5.3
    d    7.2
    e    0.0
    dtype: float64
    
    obj6 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
    print( obj6.reindex(range(6),method='bfill'))#给空值添加默认值,‘ffill’按上补充值,‘bfill’按下补充值
    #输出结果如下
    0      blue
    1    purple
    2    purple
    3    yellow
    4    yellow
    5       NaN
    dtype: object
    
    
    from numpy import nan as NA   #应用空值
    
    data = Series([1, NA, 2])#给空值
    print(data.dropna())#删除空值
    #输出结果如下
    0    1.0
    2    2.0
    dtype: float64
    
    data2 = DataFrame([[1., 6.5, 3], [1., NA, NA], [NA, NA, NA]
                      ])
    data2[4] = NA#给第4列给空值
    print(data2)
    #输出结果如下
      0    1    2   4
    0  1.0  6.5  3.0 NaN
    1  1.0  NaN  NaN NaN
    2  NaN  NaN  NaN NaN
    
    print(data2.dropna(how='all'))#删除整行为空的行
    #输出结果如下
         0    1    2   4
    0  1.0  6.5  3.0 NaN
    1  1.0  NaN  NaN NaN
    
    print(data2.dropna(axis=1, how='all'))#删除整列为空的列
    #输出结果如下
    0    1    2
    0  1.0  6.5  3.0
    1  1.0  NaN  NaN
    2  NaN  NaN  NaN
    
    data2.fillna(0)
    print(data2.fillna(0, inplace=True))#填充缺失值为0,True为更新结果到data2
    #输出结果None
    print(data2)#更新结果后输出被修改
    #输出结果如下
       0    1    2    4
    0  1.0  6.5  3.0  0.0
    1  1.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    
    

    层次化索引

    import numpy as np
    
    #建立两层索引
    data3 = Series(np.random.randn(10),
                   index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                          [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
    print (data3)
    #输出结果如下
    a  1   -0.606962
       2   -0.793390
       3    0.515835
    b  1   -0.269941
       2   -0.613685
       3   -0.078791
    c  1    1.622026
       2   -0.342152
    d  2   -0.331359
       3    0.719142
    dtype: float64
    
    print ( data3['b':'c'])#取索引对应值
    #输出结果如下
    b  1    0.024265
       2    0.140279
       3    1.465150
    c  1   -1.049863
       2    1.673730
    dtype: float64
    
    print(data3.unstack())#一维层次化索引转换为二维dataframe数组
    #输出结果如下
              1         2         3
    a  0.052463 -0.868392  0.387425
    b  0.041187  0.116177 -0.395136
    c  0.585591 -0.465362       NaN
    d       NaN  0.586438 -0.140192
    
    print(data3.unstack().stack())#还原一维层次化索引
    #输出结果如下
    a  1    0.052463
       2   -0.868392
       3    0.387425
    b  1    0.041187
       2    0.116177
       3   -0.395136
    c  1    0.585591
       2   -0.465362
    d  2    0.586438
       3   -0.140192
    dtype: float64
    
    
    
    

    相关文章

      网友评论

        本文标题:Python基础学习13

        本文链接:https://www.haomeiwen.com/subject/psjgdqtx.html