美文网首页
数据分析库pandas

数据分析库pandas

作者: junjun2018 | 来源:发表于2019-04-03 16:31 被阅读0次
    import pandas as pd
    import matplotlib.pyplot as plt
    import math
    
    # DataFrame
    print("====================创建dataFrame开始=======================")
    df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', "B"], columns=["C", "D", "E"])
    print(df1)
    print(df1.values)
    print(df1.T)
    
    print(df1.shape)
    print(df1.size)
    # 前几行和后几行
    print(df1.head(1))
    print(df1.tail(1))
    # 对一列数据进行分析,数量,平均值,方差,最小值,最大值
    print(df1.describe())
    
    # 选取某一行
    print(df1.loc["A"])
    
    print("====================创建dataFrame结束=======================")
    print("====================read csv开始=======================")
    data = pd.read_csv("resource/fff.csv", index_col="id")
    print(data.head(3))
    # print(broken_data[:3])
    print(data.shape)
    # 取出某列
    print(data['x'][:3])
    # 取出某些列
    print(data[["x", "y"]][:3])
    # 统计每个值出现的次数,可以添加过滤条件进行去重处理
    print(data["x"].value_counts())
    print("====================图形化开始=======================")
    # 画出形状
    data['x'].plot()
    # data.plot()
    # plt.show()
    print(data.dtypes)
    print("====================图形化结束=======================")
    
    print("====================read csv结束=======================")
    print("====================时间处理开始=======================")
    timedata = pd.read_csv("resource/timestamp.csv")
    print(timedata.dtypes)
    # 转换成时间,过滤时间
    local_time = pd.to_datetime(timedata["atime"], unit="s")
    print(local_time)
    condition = local_time > '2101-10-10'
    
    print(timedata[condition])
    
    print("====================时间处理结束=======================")
    
    
    print("====================cal开始=======================")
    x,y=4.47,6.55
    x1,y1=4.1,7.61
    
    rate = math.sqrt((x1-x)**2+(y1-y)**2)
    print(rate)
    
    
    print("====================cal结束=======================")
    
    /Users/jun/anaconda3/envs/python36/bin/python /Applications/PyCharm.app/Contents/helpers/pydev/pydev_run_in_console.py 51520 51521 /Users/jun/PycharmProjects/liaokepython/wanmenpython/ipandas.py
    Running /Users/jun/PycharmProjects/liaokepython/wanmenpython/ipandas.py
    import sys; print('Python %s on %s' % (sys.version, sys.platform))
    sys.path.extend(['/Users/jun/PycharmProjects/liaokepython', '/Users/jun/PycharmProjects/liaokepython/wanmenpython'])
    ====================创建dataFrame开始=======================
       C  D  E
    A  1  2  3
    B  4  5  6
    [[1 2 3]
     [4 5 6]]
       A  B
    C  1  4
    D  2  5
    E  3  6
    (2, 3)
    6
       C  D  E
    A  1  2  3
       C  D  E
    B  4  5  6
                 C        D        E
    count  2.00000  2.00000  2.00000
    mean   2.50000  3.50000  4.50000
    std    2.12132  2.12132  2.12132
    min    1.00000  2.00000  3.00000
    25%    1.75000  2.75000  3.75000
    50%    2.50000  3.50000  4.50000
    75%    3.25000  4.25000  5.25000
    max    4.00000  5.00000  6.00000
    C    1
    D    2
    E    3
    Name: A, dtype: int64
    ====================创建dataFrame结束=======================
    ====================read csv开始=======================
        areaCode     x     y    z             time  package tagId
    id                                                           
    1          1  4.65  6.55  1.2  2019/3/19 13:42     2209  B832
    2          1  4.47  6.56  1.2  2019/3/19 13:42     2210  B832
    3          1  4.47  6.55  1.2  2019/3/19 13:42     2211  B832
    (169, 7)
    id
    1    4.65
    2    4.47
    3    4.47
    Name: x, dtype: float64
           x     y
    id            
    1   4.65  6.55
    2   4.47  6.56
    3   4.47  6.55
    6.03     3
    5.55     3
    1.63     3
    2.85     2
    6.40     2
    5.83     2
    5.57     2
    5.46     2
    5.65     2
    8.78     2
    2.91     2
    5.08     2
    7.64     2
    3.42     2
    4.65     2
    4.47     2
    9.35     2
    9.24     2
    8.44     2
    5.21     2
    5.58     2
    5.61     2
    9.08     2
    3.56     2
    5.60     2
    8.52     1
    3.92     1
    4.40     1
    0.17     1
    8.74     1
            ..
    7.90     1
    9.28     1
    9.27     1
    6.11     1
    4.06     1
    6.66     1
    8.06     1
    9.66     1
    0.55     1
    2.59     1
    8.29     1
    8.79     1
    10.07    1
    3.39     1
    6.21     1
    7.44     1
    7.40     1
    4.22     1
    5.91     1
    1.17     1
    4.67     1
    8.69     1
    9.09     1
    4.05     1
    6.88     1
    5.59     1
    5.80     1
    7.33     1
    5.70     1
    8.75     1
    Name: x, Length: 141, dtype: int64
    ====================图形化开始=======================
    areaCode      int64
    x           float64
    y           float64
    z           float64
    time         object
    package       int64
    tagId        object
    dtype: object
    ====================图形化结束=======================
    ====================read csv结束=======================
    ====================时间处理开始=======================
    atime    int64
    btime    int64
    dtype: object
    0   2110-06-13 20:25:51
    1   2110-09-02 20:52:31
    2   2100-12-10 15:05:51
    Name: atime, dtype: datetime64[ns]
            atime    btime
    0  4432134351    54335
    1  4439134351  3454543
    ====================时间处理结束=======================
    ====================cal开始=======================
    1.1227199116431494
    ====================cal结束=======================
    PyDev console: starting.
    Python 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) 
    [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] on darwin
    
    

    相关文章

      网友评论

          本文标题:数据分析库pandas

          本文链接:https://www.haomeiwen.com/subject/mymzbqtx.html