美文网首页Python
python第十天

python第十天

作者: code与有荣焉 | 来源:发表于2019-11-01 19:51 被阅读0次

    一、Numpy知识点

    import numpy as np
    x = np.arange(10)
    x # array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) # 一维向量
    X = np.arange(15).reshape((5,3))
    X
    # array([[ 0,  1,  2], # 二维矩阵 
    #        [ 3,  4,  5],
    #        [ 6,  7,  8],
    #        [ 9, 10, 11],
     #       [12, 13, 14]])
    

    1. 访问

    x[2]
    # 2
    X[1,1]
    # 4
    

    2. 切片

    格式:[行切片,列切片]

    x[5:]
    # array([5, 6, 7, 8, 9])
    # 行切片,列切片
    X[2:4,1:]
    # array([[ 7,  8],
    #        [10, 11]])
    X[2:,:2]
    # array([[ 6,  7],
    #        [ 9, 10],
    #        [12, 13]])
    

    如果说不关心reshape的另外一个参数,我们可以写成-1,numpy自动推导出这个参数

    x.reshape(5,-1)
    # array([[0, 1],
    #        [2, 3],
    #        [4, 5],
    #        [6, 7],
    #        [8, 9]])
    

    3. numpy的运算

    numpy's universal function

    X
        array([[ 0,  1,  2],
               [ 3,  4,  5],
               [ 6,  7,  8],
               [ 9, 10, 11],
               [12, 13, 14]])
    X + 1
        array([[ 1,  2,  3],
               [ 4,  5,  6],
               [ 7,  8,  9],
               [10, 11, 12],
               [13, 14, 15]])
    X * 2
        array([[ 0,  2,  4],
               [ 6,  8, 10],
               [12, 14, 16],
               [18, 20, 22],
               [24, 26, 28]])
    np.sin(X)
        array([[ 0.        ,  0.84147098,  0.90929743],
               [ 0.14112001, -0.7568025 , -0.95892427],
               [-0.2794155 ,  0.6569866 ,  0.98935825],
               [ 0.41211849, -0.54402111, -0.99999021],
               [-0.53657292,  0.42016704,  0.99060736]])
    

    4. NUMPY中的argsort()

    argsort返回排序后的参数的序列

    x = np.arange(16)
    x
        array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
    # 随机打乱
    # from random import shuffle
    np.random.shuffle(x)
    x
        array([ 1,  8,  6, 12,  4, 14, 13,  7,  0, 15, 11,  3,  9,  5,  2, 10])
    # argsort返回排序后的参数的序列
    np.argsort(x)
        array([ 8,  0, 14, 11,  4, 13,  2,  7,  1, 12, 15, 10,  3,  6,  5,  9],
              dtype=int64)
    

    5.Numpy 中的布尔索引

    布尔型索引可以应用于数据的筛选
    布尔型索引应用于修改值

    names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
    names
        array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')
    # 使用np.random模块的randn生成一些正态分布的随机数据
    data = np.random.randn(7,4)
    data
        array([[ 1.8450457 ,  1.91791784, -0.90133072, -0.96715706],
               [ 0.26275727,  1.27134679,  0.33692668, -1.00586409],
               [-0.60143482, -1.41361787,  0.62431237,  0.50040347],
               [ 0.0129754 ,  1.92856064,  1.3711845 , -1.17564517],
               [ 1.43999704, -0.87670553,  0.23952736, -0.64149065],
               [-0.81460157, -1.7537682 , -0.82011688, -0.29424883],
               [-1.00896275, -1.38725507, -1.03945652, -1.19849684]])
    # 假设每个名字对应data数组的一行
    names == 'Bob'
        array([ True, False, False,  True, False, False, False])
    # 布尔型索引可以应用于数据的筛选
    data[names =='Bob']
        array([[ 1.8450457 ,  1.91791784, -0.90133072, -0.96715706],
               [ 0.0129754 ,  1.92856064,  1.3711845 , -1.17564517]])
    # 布尔型索引应用于修改值
    # 选取所有JOE的行,并且全部赋值为 0
    data[names == 'Joe'] = 0
    data
        array([[ 1.8450457 ,  1.91791784, -0.90133072, -0.96715706],
               [ 0.        ,  0.        ,  0.        ,  0.        ],
               [-0.60143482, -1.41361787,  0.62431237,  0.50040347],
               [ 0.0129754 ,  1.92856064,  1.3711845 , -1.17564517],
               [ 1.43999704, -0.87670553,  0.23952736, -0.64149065],
               [ 0.        ,  0.        ,  0.        ,  0.        ],
               [ 0.        ,  0.        ,  0.        ,  0.        ]])
    # 选取所有Will的行,并且将选取的数据的后两列赋值为0
    data[names == 'Will',2:] = 0 
    data
        array([[ 1.8450457 ,  1.91791784, -0.90133072, -0.96715706],
               [ 0.        ,  0.        ,  0.        ,  0.        ],
               [-0.60143482, -1.41361787,  0.        ,  0.        ],
               [ 0.0129754 ,  1.92856064,  1.3711845 , -1.17564517],
               [ 1.43999704, -0.87670553,  0.        ,  0.        ],
               [ 0.        ,  0.        ,  0.        ,  0.        ],
               [ 0.        ,  0.        ,  0.        ,  0.        ]])
    

    二、绘制散点图

    散点图
    散点图

    三、鸢尾花数据集散点图绘制

    sklearn库封装了很多机器学习算法


    1
    2
    3

    四、机器学习初识

    监督学习(supervised learning),无监督学习(unsupervised learning),半监督学习(Semi-Supervised Learning),强化学习(reinforcement Learning )

    • 监督学习(supervised learning)和无监督学习(unsupervised learning)的判断:是否有监督(supervised),就看输入数据是否有标签(label)。输入数据有标签,则为有监督学习,没标签则为无监督学习。
    • 监督学习:回归(Regression,连续)、分类(Classification,离散)
    • 无监督学习:聚类(clustering)
    分类算法KNN:

    K近邻算法,即K-Nearest Neighbor algorithm,简称KNN算法。
    可认为是:找最接近K的那个邻居。
    实例:肿瘤良,恶性判断(手动实现)

    自己实现KNN算法

    数据集
    import numpy as np
    from matplotlib import pyplot as plt
    from math import sqrt   # 开平方
    from collections import Counter  #collections库非常有用
    raw_data_X = [[3.393533211, 2.331273381],
                  [3.110073483, 1.781539638],
                  [1.343808831, 3.368360954],
                  [3.582294042, 4.679179110],
                  [2.280362439, 2.866990263],
                  [7.423436942, 4.696522875],
                  [5.745051997, 3.533989803],
                  [9.172168622, 2.511101045],
                  [7.792783481, 3.424088941],
                  [7.939820817, 0.791637231]
                 ]
    raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    # 转化成ndarray类型
    X_train = np.array(raw_data_X)
    y_train = np.array(raw_data_y)
    
    对数据进行可视化
    plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='g')
    plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='r')
    plt.show()
    
    数据可视化
    预测
    # 假设新来一个样本数据判断x是恶性还是良性
    x = np.array([8.093607318, 3.365731514])
    plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='g')
    plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='r')
    plt.scatter(x[0],x[1],color='b')
    plt.show()
    
    预测
    通过knn算法来预测
    # 计算x距离所有的十个点的距离,然后选距离最近的前k个
    # distances = []
    # for x_train in X_train:
    #     d = sqrt(np.sum((x_train-x)**2))
    #     distances.append(d)
    distances = [sqrt(np.sum((x_train-x)**2)) for x_train in X_train] # 欧式距离
    distances
    '''
    [4.812566907609877,
     5.229270827235305,
     6.749798999160064,
     4.6986266144110695,
     5.83460014556857,
     1.4900114024329525,
     2.354574897431513,
     1.3761132675144652,
     0.3064319992975,
     2.5786840957478887]
    '''
    nearst = np.argsort(distances)
    nearst
    # array([8, 7, 5, 6, 9, 3, 0, 1, 4, 2], dtype=int64)
    # 假设我们制定K的值是6
    k = 6
    top_k_y = [y_train[i] for i in nearst[:6]]
    top_k_y
    # [1, 1, 1, 1, 1, 0]
    
    数据统计量大的话使用的统计办法
    votes = Counter(top_k_y)
    votes
    # Counter({1: 5, 0: 1})
    # 返回数量前i的数据信息
    votes.most_common(1)
    predict_y = votes.most_common(1)[0][0]
    predict_y
    1
    # 结论:x恶
    

    相关文章

      网友评论

        本文标题:python第十天

        本文链接:https://www.haomeiwen.com/subject/cqsqbctx.html