支持向量机python实现(简易版)

作者: 牛顿学计算机 | 来源:发表于2018-09-30 09:32 被阅读33次

    支持向量机的理论这里就不介绍了,可参考《统计学习方法》这本书,书上已经把整个过程写得很详细。如果觉得看书太枯燥,花费时间多,可以参考这边博文https://blog.csdn.net/v_JULY_v/article/details/7624837,写得非常好,通俗易懂。下面,是我参考《机器学习实战》这本书上代码写得支持向量机实现代码。

    from numpy import *
    import numpy as np
    import matplotlib.pyplot as plt
    
    def load_set_data(file_name):
        data_mat = []
        label_mat = []
        n = 0
    
        fr = open(file_name)
        for line in fr.readlines():
            line_arr = line.strip().split("\t")
            data_mat.append([float(line_arr[0]), float(line_arr[1])])
            label_mat.append(float(line_arr[2]))
            n += 1
    
        return data_mat, label_mat, n
    
    def select_j_rand(i, m):
        j = i
        while (j == i):
            j = int(random.uniform(0, m))
    
        return j
    
    def clip_alpha(aj, H, L):
        if aj > H:
            aj = H
        if aj < L:
            aj = L
    
        return aj
    
    def smo_simple(data_matin, class_labele, C, toler, max_iter):
        data_matrix = mat(data_matin)  #将输入列表转成矩阵
        label_mat = mat(class_labele).transpose()  #将训练数据转成列向量
        b = 0
        m,n = shape(data_matrix)
        alphas = mat(zeros((m, 1)))
        iter = 0
    while(iter < max_iter):
            alpha_pirs_change = 0
            for i in range(m):
                fxi = float(multiply(alphas, label_mat).T * (data_matrix * (data_matrix[i, :].T))) + b
                ei = fxi - float(label_mat[i])
                #if ((label_mat[i] * ei < -toler) and (alphas[i] < C)) or \
                #    ((label_mat[i] * ei > toler) and \
                #    (alphas[i] > 0)):
                if((label_mat[i] * (fxi - 2 * b) <= 1 and alphas[i] < C)\
                    or (label_mat[i] * (fxi - 2 * b) >= 1 and alphas[i] > 0)\
                    or (label_mat[i] * (fxi - 2 * b) == 1 and (alphas[i] == 0 or alphas[i] == C))):
                    j = select_j_rand(i, m)  #随机选择aj且i != j,相当于随机选择ai和aj
                    fxj = float(multiply(alphas, label_mat).T * \
                        (data_matrix * data_matrix[j, :].T)) + b
                    ej = fxj - float(label_mat[j])
                    alpha_iold = alphas[i].copy()  #保存alphai更新前的值
                    alpha_jold = alphas[j].copy()  #保存alphaj更新前的值
                    #求解alphaj的上下边界
                    if (label_mat[i] != label_mat[j]):
                        L = max(0, alpha_jold - alpha_iold)
                        H = min(C, C + alpha_jold + alpha_iold)
                   else:
                        L = max(0, alpha_jold + alpha_iold - C)
                        H = min(C, alpha_iold + alpha_jold)
                    if (L == H):
                        print("L == H")
                        continue
                   eta = 2 * data_matrix[i, :] * data_matrix[j, :].T \
                        - data_matrix[i, :] * data_matrix[i, :].T - data_matrix[j, :] * data_matrix[j, :].T
                    if (eta > 0):
                        print("eta > 0")
                        continue
                    alphas[j] = alpha_jold - (label_mat[j] * (ei - ej) * 1.0 / eta)
                    alphas[j] = clip_alpha(alphas[j], H, L)
                    if (abs(alphas[j] - alpha_jold) < 0.00001):  
                        print("j not moving enough")
                        continue
                    alphas[i] = alpha_iold + (label_mat[i] * label_mat[j] * (alpha_jold - alphas[j]))
                    b1 = b - ei - label_mat[i] * (alphas[i] - alpha_iold) * (data_matrix[i, :] * data_matrix[i, :].T)\
                        - label_mat[j] * (alphas[j] - alpha_jold) * (data_matrix[i, :] * data_matrix[j, :].T)
                    b2 = b - ej - label_mat[i] * (alphas[i] - alpha_iold) * (data_matrix[i, :] * data_matrix[j, :].T)\
                        - label_mat[j] * (alphas[j] - alpha_jold) * (data_matrix[j, :] * data_matrix[j, :].T) 
                    if (alphas[i] > 0 and alphas[i] < C):
                        b = b1
                    elif (alphas[j] > 0 and alphas[j] < C):
                        b = b2
                    else:
                        b = (b1 + b2) / 2.0
                    alpha_pirs_change += 1
                    print("iter: %d i: %d, paris changed % d" % (iter, i, alpha_pirs_change))
            if (alpha_pirs_change == 0):
                iter += 1
            else:
                iter = 0
            print("iteration number: %d" % iter)
        return b,alphas
    
    def show_experiment_plot(alphas, data_list_in, label_list_in, b, n):
        data_arr_in = array(data_list_in)
        label_arr_in = array(label_list_in)
        alphas_arr = alphas.getA()
        data_mat = mat(data_list_in)
        label_mat = mat(label_list_in).transpose()
    
        i = 0
        weights = zeros((2, 1))
        while(i < n):
            if(label_arr_in[i] == -1):
                plt.plot(data_arr_in[i, 0], data_arr_in[i, 1], "ob")
            elif(label_arr_in[i] == 1):
                plt.plot(data_arr_in[i, 0], data_arr_in[i, 1], "or")
            if(alphas_arr[i] > 0):
                plt.plot(data_arr_in[i, 0], data_arr_in[i, 1], "oy")
                weights += multiply(alphas[i] * label_mat[i], data_mat[i, :].T)
            i += 1
    
        x = arange(-2, 12, 0.1)
        y = []
        for k in x:
            y.append(float(-b - weights[0] * k) / weights[1])
    
        plt.plot(x, y, '-g')
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.show()
    
    def main():
        data_list,label_list, n = load_set_data("test_set.txt")
        b,alphas = smo_simple(data_list, label_list, 0.6, 0.001, 40)
        b_data = array(b)[0][0]
        show_experiment_plot(alphas, data_list, label_list, b_data, n)
    
    main()
    

    代码中被注释的部分是《机器学习实战》这本书的写法,看了很久还是没有搞明白作者为什么这么写,如果哪位大神知道,还请说一下,大家好互相学习。根据KKT条件我自己重现实现了更新alphas的条件判断。
    特征到结果的输出函数:
    ui = w.x - b
    其中w,x和ui均为向量


    2.PNG

    也就是说,当没有满足KKT条件时,则需要更新alphas的值。
    实验数据:

    3.542485    1.977398    -1
    3.018896    2.556416    -1
    7.551510    -1.580030   1
    2.114999    -0.004466   -1
    8.127113    1.274372    1
    7.108772    -0.986906   1
    8.610639    2.046708    1
    2.326297    0.265213    -1
    3.634009    1.730537    -1
    0.341367    -0.894998   -1
    3.125951    0.293251    -1
    2.123252    -0.783563   -1
    0.887835    -2.797792   -1
    7.139979    -2.329896   1
    1.696414    -1.212496   -1
    8.117032    0.623493    1
    8.497162    -0.266649   1
    4.658191    3.507396    -1
    8.197181    1.545132    1
    1.208047    0.213100    -1
    1.928486    -0.321870   -1
    2.175808    -0.014527   -1
    7.886608    0.461755    1
    3.223038    -0.552392   -1
    3.628502    2.190585    -1
    7.407860    -0.121961   1
    7.286357    0.251077    1
    2.301095    -0.533988   -1
    -0.232542   -0.547690   -1
    3.457096    -0.082216   -1
    3.023938    -0.057392   -1
    8.015003    0.885325    1
    8.991748    0.923154    1
    7.916831    -1.781735   1
    7.616862    -0.217958   1
    2.450939    0.744967    -1
    7.270337    -2.507834   1
    1.749721    -0.961902   -1
    1.803111    -0.176349   -1
    8.804461    3.044301    1
    1.231257    -0.568573   -1
    2.074915    1.410550    -1
    -0.743036   -1.736103   -1
    3.536555    3.964960    -1
    8.410143    0.025606    1
    7.382988    -0.478764   1
    6.960661    -0.245353   1
    8.234460    0.701868    1
    8.168618    -0.903835   1
    1.534187    -0.622492   -1
    9.229518    2.066088    1
    7.886242    0.191813    1
    2.893743    -1.643468   -1
    1.870457    -1.040420   -1
    5.286862    -2.358286   1
    6.080573    0.418886    1
    2.544314    1.714165    -1
    6.016004    -3.753712   1
    0.926310    -0.564359   -1
    0.870296    -0.109952   -1
    2.369345    1.375695    -1
    1.363782    -0.254082   -1
    7.279460    -0.189572   1
    1.896005    0.515080    -1
    8.102154    -0.603875   1
    2.529893    0.662657    -1
    1.963874    -0.365233   -1
    8.132048    0.785914    1
    8.245938    0.372366    1
    6.543888    0.433164    1
    -0.236713   -5.766721   -1
    8.112593    0.295839    1
    9.803425    1.495167    1
    1.497407    -0.552916   -1
    1.336267    -1.632889   -1
    9.205805    -0.586480   1
    1.966279    -1.840439   -1
    8.398012    1.584918    1
    7.239953    -1.764292   1
    7.556201    0.241185    1
    9.015509    0.345019    1
    8.266085    -0.230977   1
    8.545620    2.788799    1
    9.295969    1.346332    1
    2.404234    0.570278    -1
    2.037772    0.021919    -1
    1.727631    -0.453143   -1
    1.979395    -0.050773   -1
    8.092288    -1.372433   1
    1.667645    0.239204    -1
    9.854303    1.365116    1
    7.921057    -1.327587   1
    8.500757    1.492372    1
    1.339746    -0.291183   -1
    3.107511    0.758367    -1
    2.609525    0.902979    -1
    3.263585    1.367898    -1
    2.912122    -0.202359   -1
    1.731786    0.589096    -1
    2.387003    1.573131    -1
    

    实验结果如下:


    1.PNG

    图中黄色的点是支持向量的(alphas[i] > 0对应的(xi, yi))。实验结果表明。该算法能正确分类,效果良好。

    相关文章

      网友评论

        本文标题:支持向量机python实现(简易版)

        本文链接:https://www.haomeiwen.com/subject/cxtboftx.html