机器学习利器之Numpy

作者: coderzc | 来源:发表于2018-12-04 18:39 被阅读24次
    Numpy
    多维数组

    Numpy 创建N维数组

    import numpy as np
    
    ''' 创建10行10列的数值为浮点0的矩阵 '''
    >>> print("np.zeros\n", np.zeros([10, 10]))
    np.zeros
     [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
    
    ''' 创建10行10列的数值为浮点1的矩阵 '''
    >>> print("np.ones\n", np.ones([10, 10]))
    np.ones
     [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
    
    
    ''' 创建10行10列的数值为浮点1的对角矩阵 '''
    >>> print("np.eye\n", np.eye(10, 10))
    np.eye
     [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
    
    ''' 从数值范围创建数组 开始,结束,步长,输出元素类型 '''
    >>> print("np.arange\n", np.arange(0, 100, 2, float))
    np.arange
     [ 0.  2.  4.  6.  8. 10. 12. 14. 16. 18. 20. 22. 24. 26. 28. 30. 32. 34.
     36. 38. 40. 42. 44. 46. 48. 50. 52. 54. 56. 58. 60. 62. 64. 66. 68. 70.
     72. 74. 76. 78. 80. 82. 84. 86. 88. 90. 92. 94. 96. 98.]
    
    '''生产随机数组 5行5列 范围0~1'''
    >>> np.random.rand(5, 5)
    array([[0.79909192, 0.40687012, 0.05833267, 0.90631693, 0.85774438],
           [0.65685319, 0.99620959, 0.64195711, 0.28694344, 0.54805126],
           [0.87347445, 0.20443748, 0.45883044, 0.90017425, 0.17487183],
           [0.4833086 , 0.59498315, 0.75053456, 0.93725983, 0.79870607],
           [0.8908418 , 0.49860926, 0.44097606, 0.53744394, 0.21089092]])
    
    
    ''' 生成在半开半闭区间 [low,high)上离散均匀分布的整数值;若high=None,则取值区间变为[0,low) ; size维度 '''
    >>> np.random.randint(4,10,size=(5, 5))
    array([[6, 6, 6, 7, 9],
           [4, 8, 6, 7, 7],
           [6, 8, 6, 5, 7],
           [7, 8, 8, 4, 5],
           [7, 6, 5, 5, 7]])
    
    
    ''' 给定均值/标准差/维度的正态分布 '''
    >>> np.random.normal(1.75, 0.1, (3, 4))
    array([[1.83246388, 1.73186179, 1.78198763, 1.76844117],
           [1.69089184, 1.69620751, 1.78018062, 1.68086896],
           [1.86462936, 1.61972878, 1.95645574, 1.66104741]])
    
    
    ''' 将列表转换为np数组 '''
    >>> array = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
    >>> np_array = np.array(array, dtype=float)  # copy,新数组
    >>> print("np.array:\n", np_array)
    np.array:
     [[ 1.  2.  3.  4.]
     [ 5.  6.  7.  8.]
     [ 9. 10. 11. 12.]]
    
    >>> np_array2 = np.asarray(array, dtype=float)  # view,会改变原数组
    >>> print("np.asarray:\n", np_array2)
    np.asarray:
     [[ 1.  2.  3.  4.]
     [ 5.  6.  7.  8.]
     [ 9. 10. 11. 12.]]
    

    查看数组属性

    # 数组元素个数
    >>> print("数组元素个数 size:", np_array.size)
    数组元素个数 size: 12
    # 数组形状
    >>> print("数组形状 shape:", np_array.shape)
    数组形状 shape: (3, 4)
    # 数组维度
    >>> print("数组维度 ndim:", np_array.ndim)
    数组维度 ndim: 2
    # 数组元素类型
    >>> print("数组元素类型 dtype:", np_array.dtype)
    数组元素类型 dtype: float64
    # 数组中每个元素的字节大小
    >>> print("数组元素类型 itemsize:", np_array.itemsize)
    数组元素类型 itemsize: 8
    

    shape操作

    >>> array = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
    >>> n1 = np.asarray(array)
    
    # 改变数组的格式
    >>> n2 = n1.reshape(6, 2)
    >>> print(n1)
    [[ 1  2  3  4]
     [ 5  6  7  8]
     [ 9 10 11 12]]
    >>> print(n2)
    [[ 1  2]
     [ 3  4]
     [ 5  6]
     [ 7  8]
     [ 9 10]
     [11 12]]
    
    # 将多维降到1维展开
    >>> print("flatten():", n2.flatten())  # copy,新数组
    flatten(): [ 1  2  3  4  5  6  7  8  9 10 11 12]
    >>> print("ravel():", n2.ravel())  # view,会改变原数组,却不会改变shape
    ravel(): [ 1  2  3  4  5  6  7  8  9 10 11 12]
    
    # 转置
    >>> n3 = np.arange(12)
    >>> n3 = n3.reshape(3, 4)
    >>> print("n3:", n3)
    n3: [[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]]
    
    >>> print("n3.T:", n3.T)
    n3.T: [[ 0  4  8]
     [ 1  5  9]
     [ 2  6 10]
     [ 3  7 11]]
    
    # reshape一些特殊值
    >>> n4 = np.arange(10, 130, 10)
    >>> print("n4:", n4.reshape(4, 3))
    n4: [[ 10  20  30]
     [ 40  50  60]
     [ 70  80  90]
     [100 110 120]]
    
    #   -1 一维展开 与 ravel()作用相似
    >>> print(n4.reshape(-1))
    [ 10  20  30  40  50  60  70  80  90 100 110 120]
    
    #   (-1,1) n行,1列
    >>> print(n4.reshape(-1, 1))
    [[ 10]
     [ 20]
     [ 30]
     [ 40]
     [ 50]
     [ 60]
     [ 70]
     [ 80]
     [ 90]
     [100]
     [110]
     [120]]
    
    #   (1,-1) 1行,n列但任然是二维矩阵
    >>> print(n4.reshape(1, -1))
    [[ 10  20  30  40  50  60  70  80  90 100 110 120]]
    

    数组索引和迭代

    >>> print('\n\n')
    >>> n5 = np.arange(30)
    >>> print('n5:', n5)
    n5: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
     24 25 26 27 28 29]
    
    # 获取第一个元素
    >>> print(n5[0])
    0
    
    # 获取倒数第一个元素
    >>> print(n5[-1])
    29
    
    # 取前十个数
    >>> print(n5[:10])
    [0 1 2 3 4 5 6 7 8 9]
    
    # 取后十个数
    >>> print(n5[-10:])
    [20 21 22 23 24 25 26 27 28 29]
    
    # 取前11-20个数,左闭右开
    >>> print(n5[10:20])
    [10 11 12 13 14 15 16 17 18 19]
    
    # 前十个数中,每2个数取一个
    >>> print(n5[:10:2])
    [0 2 4 6 8]
    
    # 第6-15个数中,每3个数取一个
    >>> print(n5[5:15:3])
    [ 5  8 11 14]
    
    # 所有的数中,每10个数取一个
    >>> print(n5[::10])
    [ 0 10 20]
    
    # 什么都不写,可以原样复制一个数组
    >>> print(n5[:])
    [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
     24 25 26 27 28 29]
    
    
    ############### 多维数组索引与切片 ###############
    >>> n6 = n5.reshape(5, 6)
    >>> print('n6:', n6)
    n6: [[ 0  1  2  3  4  5]
     [ 6  7  8  9 10 11]
     [12 13 14 15 16 17]
     [18 19 20 21 22 23]
     [24 25 26 27 28 29]]
    
    #   索引第二行第三列的元素
    >>> print('n6[1, 2]:', n6[1, 2])
    n6[1, 2]: 8
    
    #   在第一维取前两行,第二维每+2取一个元素
    >>> print('n6[:2, ::2]:\n', n6[:2, ::2])
    n6[:2, ::2]:
     [[ 0  2  4]
     [ 6  8 10]]
    
    # 取第一列
    >>> print('n6[:, 0]]:\n', n6[:, 0])
    n6[:, 0]]:
     [ 0  6 12 18 24]
    
    # 取第2、3列
    >>> print('n6[:, 3:5]]:\n', n6[:, 3:5])
    n6[:, 3:5]]:
     [[ 3  4]
     [ 9 10]
     [15 16]
     [21 22]
     [27 28]]
    

    拼接、分割

    >>> A=np.arange(10,20).reshape(5,2)
    >>> print(A)
    [[10 11]
     [12 13]
     [14 15]
     [16 17]
     [18 19]]
    
    >>> B=np.arange(20,30).reshape(5,2)
    >>> print(B)
    [[20 21]
     [22 23]
     [24 25]
     [26 27]
     [28 29]]
    
    # 垂直拼接
    >>> C=np.vstack([A,B]) #C=np.r_[A,B]
    >>> print(C)
    [[10 11]
     [12 13]
     [14 15]
     [16 17]
     [18 19]
     [20 21]
     [22 23]
     [24 25]
     [26 27]
     [28 29]]
    
    # 自我堆叠
    >>> v=np.asarray([1,2])
    >>> a=np.vstack([v]*2)
    >>> print(a)
    [[1 2]
     [1 2]]
    # 横向堆叠两次,纵向堆叠一次
    >>> b=np.tile(v,(2,1))
    >>> print(b)
    [[1 2]
     [1 2]]
    
    # 水平拼接
    >>> C=np.hstack([A,B]) #C=np.c_[A,B]
    >>> print(C)
    [[10 11 20 21]
     [12 13 22 23]
     [14 15 24 25]
     [16 17 26 27]
     [18 19 28 29]]
    
    #列组合column_stack([A,B]) 一维数组:按列方向组合  二维数组:同hstack一样
    >>> A=np.arange(10,20)
    >>> print(A)
    [10 11 12 13 14 15 16 17 18 19]
    #行组合row_stack([A,B]) 一维数组:按行方向组合  二维数组:同vstack一样
    

    基础运算

    >>> n7 = np.asarray([10, 20, 30,40])
    >>> n8 = np.arange(4)
    >>> print(n7)
    [10 20 30 40]
    >>> print(n8)
    [0 1 2 3]
    
    # 计算立方
    >>> print(n7**3)
    [ 1000  8000 27000 64000]
    
    # 三角函数
    >>> print(np.sin(n7))
    [-0.54402111  0.91294525 -0.98803162  0.74511316]
    
    # 指定轴最大/小值
    >>> print(np.amax(n7, axis=0))
    40
    >>> print(np.amin(n7, axis=0))
    10
    
    # 平均值
    >>> print(np.mean(n7, axis=0))
    25.0
    
    # 中位数
    >>> print(np.median(n7))
    25.0
    
    # 方差
    >>> print(n7.var())
    125.0
    
    # 标准差
    >>> print(np.std(n7, axis=0))
    11.180339887498949
    
    # 差值
    >>> print("n7-n8:",n7-n8)
    n7-n8: [10 19 28 37]
    
    
    # 逐个相乘非矩阵乘法
    >>> n9 = np.asarray([[1,1],[0,1]])
    >>> print(n9)
    [[1 1]
     [0 1]]
    >>> n10=np.arange(4).reshape((2,2))
    >>> print(n10)
    [[0 1]
     [2 3]]
    >>> print(n9 * n10) 
    [[0 1]
     [0 3]]
    

    矩阵计算

    # Ax=B 求解x
    >>> A = np.array([[2, 1, -2], [3, 0, 1], [1, 1, -1]])
    >>> B = np.transpose(np.array([[-3, 5, -2]]))
    >>> x = np.linalg.solve(A, B)
    >>> print('x:\n', x)
    x:
     [[ 1.]
     [-1.]
     [ 2.]]
    
    # 矩阵相乘 C=AB  求解C
    >>> A = np.array([[3, 2, -2], [3, 1, 4], [3, 1, -2]])
    >>> B = np.arange(9).reshape((3,3))
    >>> C = np.dot(A, B)
    >>> print('C:\n',C)
    C:
     [[-6 -3  0]
     [27 35 43]
     [-9 -7 -5]]
    
    
    # 矩阵乘向量
    >>> v=np.asarray([1,2])
    >>> print(v)
    [1 2]
    >>> A=np.arange(1,5).reshape(2,2)
    >>> print(A)
    [[1 2]
     [3 4]]
    >>> D=v.dot(A)
    >>> print(D)
    [ 7 10]
    # 自动将v转换为列向量,结果有自动转化为行向量
    >>> C=A.dot(v)
    >>> print(C)
    [ 5 11]
    
    
    # 矩阵的逆
    >>> A=np.arange(1,5).reshape(2,2)
    >>> print(A)
    [[1 2]
     [3 4]]
    >>> invA=np.linalg.inv(A)
    >>> print(invA)
    [[-2.   1. ]
     [ 1.5 -0.5]]
    #   矩阵乘以矩阵的逆等于单位矩阵对角线都为1,其他为0,这里有浮点误差
    >>> print(A.dot(invA))
    [[1.00000000e+00 1.11022302e-16]
     [0.00000000e+00 1.00000000e+00]]
    
    # 对于非方阵求伪逆矩阵
    >>> A=np.arange(1,11).reshape(2,5)
    >>> print(A)
    [[ 1  2  3  4  5]
     [ 6  7  8  9 10]]
    >>> pinvA=np.linalg.pinv(A)
    >>> print(pinvA)
    [[-0.36  0.16]
     [-0.2   0.1 ]
     [-0.04  0.04]
     [ 0.12 -0.02]
     [ 0.28 -0.08]]
    >>> print(A.dot(pinvA))
    [[ 1.00000000e+00 -1.11022302e-16]
     [-8.88178420e-16  1.00000000e+00]]
    

    排序与arg

    >>> x=np.asarray([2,6,7,1,4,5,8,3,10,9])
    >>> print(x)
    [ 2  6  7  1  4  5  8  3 10  9]
    
    # argxxx 索引函数
    >>> print(np.argmax(x)) #最大数的索引为8
    8
    >>> print(np.argmin(x))
    3
    
    # 排序
    >>> print(np.sort(x))
    [ 1  2  3  4  5  6  7  8  9 10]
    
    # x原地排序
    >>> x.sort()
    >>> print(x)
    [ 1  2  3  4  5  6  7  8  9 10]
    
    #打乱顺序
    >>> np.random.shuffle(x)
    >>> print(x)
    [ 9  3  5  6  2  1  8  4  7 10]
    
    # 返回排序索引
    >>> print(np.argsort(x))
    [5 4 1 7 2 3 8 6 0 9]
    
    # 划分大于3和小于3 (快排子过程)
    >>> print(np.partition(x,3))
    [ 2  1  3  4  5  6  8  7  9 10]
    

    FancyIndexing与np比较

    >>> x=np.arange(16)
    >>> print(x)
    [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
    
    # FancyIndexing
    >>> a=[3,5,8]
    >>> print(x[a])
    [3 5 8]
    
    # np数组比较
    >>> x<3
    array([ True,  True,  True, False, False, False, False, False, False,
           False, False, False, False, False, False, False])
    
    >>> 2*x==24-4*x
    array([False, False, False, False,  True, False, False, False, False,
           False, False, False, False, False, False, False])
    
    # 小于等于3的元素Ture/False序列
    >>> i=(x<=3)
    >>> print(i)
    [ True  True  True  True False False False False False False False False
     False False False False]
    >>> print(x[i])
    [0 1 2 3]
    >>> np.sum(i) # 对值Ture累加记数
    4
    
    # 是否含有零元素
    >>> np.any(x==0)
    True
    
    # 是否都等于零
    >>> np.all(x==0)
    False
    
    # 判断两个数组是否相等
    >>> np.all(x==x)
    True
    

    读取数据

    >>> np.genfromtxt("http://aima.cs.berkeley.edu/data/iris.csv", delimiter=",",skip_header=0,dtype="f8,f8,f,i4,|S8")
    array([(5.1, 3.5, 1.4, 0, b'setosa'), (4.9, 3. , 1.4, 0, b'setosa'),
           (4.7, 3.2, 1.3, 0, b'setosa'), (4.6, 3.1, 1.5, 0, b'setosa'),
           (5. , 3.6, 1.4, 0, b'setosa'), (5.4, 3.9, 1.7, 0, b'setosa'),
           (4.6, 3.4, 1.4, 0, b'setosa'), (5. , 3.4, 1.5, 0, b'setosa'),
           (4.4, 2.9, 1.4, 0, b'setosa'), (4.9, 3.1, 1.5, 0, b'setosa'),
           (5.4, 3.7, 1.5, 0, b'setosa'), (4.8, 3.4, 1.6, 0, b'setosa'),
      ......
      ......
           (5.8, 2.7, 5.1, 1, b'virginic'), (6.8, 3.2, 5.9, 2, b'virginic'),
           (6.7, 3.3, 5.7, 2, b'virginic'), (6.7, 3. , 5.2, 2, b'virginic'),
           (6.3, 2.5, 5. , 1, b'virginic'), (6.5, 3. , 5.2, 2, b'virginic'),
           (6.2, 3.4, 5.4, 2, b'virginic'), (5.9, 3. , 5.1, 1, b'virginic')],
          dtype=[('f0', '<f8'), ('f1', '<f8'), ('f2', '<f8'), ('f3', '<i4'), ('f4', 'S8')])
    

    相关文章

      网友评论

        本文标题:机器学习利器之Numpy

        本文链接:https://www.haomeiwen.com/subject/ezlvcqtx.html