SVM

作者: 馒头and花卷 | 来源:发表于2019-08-20 17:00 被阅读0次

    支持向量机

    本节将依SMO算法训练线性SVM, 核方法的使用可以很方便的进行扩展.

    import numpy as np
    import matplotlib.pyplot as plt
    

    下面的Point的类中的:
    g(x) = \sum_{i=1}^N \alpha_i y_i k(x_i, x) + b,
    e = g - y, yg = y * g

    class Point:
        """
        因为在SM0算法中,我们需要不断更新以及判断数据点的
        各种属性,所以创建了一个新的数据类型Point.
        """
        def __init__(self, x, y, k, order,
                     g, alpha, c):
            assert y == 1 or y == -1, "Invalid y"
            self.x = x  #数据向量
            self.y = y  # 类
            self.order = order  #序
            self.k = k  #距离矩阵
            self.c = c  
            self.g = g
            self.yg = g * y #衡量违背条件的指标
            self.alpha = alpha
            self.e = g - y
    
        def kkt(self, epsilon=0.001):
            """
            检查是否符合KKT条件
            :epsilon: 条件判断时允许的误差
            :return: 0 表示符合, 1 表示 不符合 其原因是便于统计不满足kkt的数据点的总量
            """
            alpha = self.alpha
            cond = self.y * self.g
            if alpha == 0. and cond < 1 - epsilon:
                self.satisfy_kkt = False
            elif 0 < alpha < self.c and (cond <1 - epsilon or cond > 1 + epsilon):
                self.satisfy_kkt = False
            elif alpha == self.c and cond > 1 + epsilon:
                self.satisfy_kkt = False
            else:
                self.satisfy_kkt = True
            if self.satisfy_kkt:
                return 0
            else:
                return 1
        def update_g_e(self, dog1, dog2, oldb, newb, newalpha1, newalpha2):
            """
            更新g, e参数
            """
            k1 = self.k[self.order, dog1.order]
            k2 = self.k[self.order, dog2.order]
            self.g = self.g + (newalpha1- dog1.alpha) * dog1.y * k1 \
                            + (newalpha2- dog2.alpha) * dog2.y * k2 \
                            + newb - oldb
            self.e = self.g - self.y
            self.yg = self.g * self.y
    
    

    下面是Points类的定义, 用训练和预测,值得一提的是在initial函数中对于alpha的初始化,从[0, self.c / self.size]中依照均匀分布选取,并且对最后一个样本的进行特别处理,以保证\sum \alpha_i y_i=0, 这意味着alpha将是一个可行解, 一些文章中似乎是将alpha初始化为0的,不知道按照这种方式怎么进行更新迭代,因为:
    \alpha_2^{new, unc} = \alpha_2^{old} + \frac{y_2(E_1-E_2)}{\eta},
    \alpha^{old} = 0, b = 0, \Rightarrow E=0 \Rightarrow \alpha^{new}=0. 意味着样本的参数\alpha是不会变化的.

    另外calc_func_value,计算的是:
    \frac{1}{2} \sum_{i=1}^N\sum_{j=1}^N \alpha_i \alpha_j y_i y_j k(x_i, x_j) - \sum_{i=1}^N \alpha_i
    这个值越小收敛的越好

    class Points:
        """
        数据点的集合
        """
        def __init__(self, points, labels, c=1.):
            self.points = []  #用于保存Point对象
            self.size = len(points)
            self.initial(points, labels, c=c)
    
    
        def distance(self, point1, point2):
            """
            计算内积矩阵, 可以在这个函数中加入核方法而达到
            核SVM的目的.
            """
            return point1 @ point2
    
        def initial(self, points, labels, b=0., c=1.):
            """
            初始化 w, b
            w: 权重
            b: 截距
            c: 正则化项的系数
            :return:
            """
            self.b =  b
            self.c = c
            
            # alpha 进行初始化
            alpha = np.random.uniform(0, self.c / self.size, size=self.size)
            y = labels
            the_sum = np.sum(y * alpha)
            alpha[-1] = alpha[-1] - the_sum * y[-1]
            while not (0 < alpha[-1] < self.c):
                alpha = np.random.uniform(0, self.c / self.size, size=self.size)
                the_sum = np.sum(y * alpha)
                alpha[-1] = alpha[-1] - the_sum * y[-1]
                
            alphay = alpha * y
            k = np.zeros((self.size, self.size))
            for i in range(self.size):
                for j in range(i, self.size):
                    k[i, j] = self.distance(points[i], points[j])
                    k[j, i] = self.distance(points[j], points[i])
            self.k = k #内积矩阵
            g = k @ alphay + self.b
            for i in range(self.size):
                point = Point(points[i], labels[i], k, i,
                              g[i], alpha[i], c)
                self.points.append(point)
    
    
        def find_dog1(self, points):
            """
            寻找第一个点
            """
            lucky_dog1 = None
            lucky_value1 = 99999999999.
            dogs = points.copy()
            #首先我们在不满足的kkt条件的支持向量中寻找
            for dog in dogs:
                if not dog.satisfy_kkt and dog.alpha > 0:
                    cond =np.abs(dog.yg - 1)
                    if cond < lucky_value1:
                        lucky_dog1 = dog
                        lucky_value1 = cond
                        
            #如果没找到,再在整个训练集中找违背kkt最严重的点.
            if lucky_dog1 is None:
                for dog in dogs:
                    if not dog.satisfy_kkt:
                        if dog.yg < lucky_value1:
                            lucky_dog1 = dog
                            lucky_value1 = dog.yg
            return lucky_dog1
    
        def find_dog2(self, points, dog1):
            """
            寻找第二个点
            """
            dogs = points.copy()
            e1 = dog1.e
            lucky_dog2 = None
            lucky_value2 = -1.
            #寻找使得|e2-e1|最大的点
            for dog in dogs:
                e2 = dog.e
                value = np.abs(e2 - e1)
                if value > lucky_value2:
                    lucky_dog2 = dog
                    lucky_value2 = value
            return lucky_dog2
    
    
        def calc_l_h(self, dogs):
            """
            alpha需要裁剪,所以要计算L, H
            :param dogs:
            :return:
            """
            dog1 = dogs[0]
            dog2 = dogs[1]
            if dog1.y != dog2.y:
                l = max(0, dog2.alpha-dog1.alpha)
                h = min(self.c, self.c + dog2.alpha-dog1.alpha)
            else:
                l = max(0, dog2.alpha+dog1.alpha-self.c)
                h = min(self.c, dog2.alpha+dog1.alpha)
            return (l, h)
    
        def update(self, dogs, l, h):
            """
            更新alpha, b, g, e
            """
            dog1 = dogs[0]
            dog2 = dogs[1]
            e1 = dog1.e
            e2 = dog2.e
            k11 = self.k[dog1.order, dog1.order]
            k12 = self.k[dog1.order, dog2.order]
            k21 = self.k[dog2.order, dog1.order]
            k22 = self.k[dog2.order, dog2.order]
            eta =  k11 + k22 - 2 * k12
            alpha2_unc = dog2.alpha + dog2.y * (e1 - e2) / eta  #未裁剪的alpha2
            if alpha2_unc < l:
                alpha2 = l
            elif l < alpha2_unc < h:
                alpha2 = alpha2_unc
            else:
                alpha2 = h
            alpha1 = dog1.alpha + dog1.y * dog2.y * (dog2.alpha - alpha2)
            b1 = -e1 - dog1.y * k11 * (alpha1 - dog1.alpha) - \
                 dog2.y * k21 * (alpha2 - dog2.alpha) + self.b
            b2 = -e2 - dog1.y * k12 * (alpha1 - dog1.alpha) - \
                 dog2.y * k22 * (alpha2 - dog2.alpha) + self.b
            b = (b1 + b2) / 2
            #更新g, e
            for point in self.points:
                point.update_g_e(dog1, dog2, self.b, b, alpha1, alpha2)
    
            #更新alpha, b
            dog1.alpha = alpha1
            dog2.alpha = alpha2
            self.b = b
            return (alpha1, alpha2, b)
    
        def update_kkt(self, epsilon):
            """
            检查每个Point是否符合kkt条件,并返回
            不合格的Point的数量
            """
            count = 0
            for point in self.points:
                count += point.kkt(epsilon)
            return count
    
        def calc_func_value(self):
            """
            计算函数的值
            """
            alpha = np.array([point.alpha for point in self.points])
            y = np.array([point.y for point in self.points])
            alphay = alpha * y
            value = alphay @ self.k @ alphay - np.sum(alpha)
            return value
    
        def calc_w(self):
            """
            计算w
            """
            w = 0.0
            for point in self.points:
                w += point.alpha * point.y * point.x
            self.w = w
            return w
    
    
        def smo(self, max_iter, x, y1, y2, epsilon=1e-3, enough_num=10):
            """
            事实上,并不需要传入x, y1, y2, 这里只是做可视化用,
            w 也仅在线性分类器时是有用的.
            enough_num: 当不满足kkt条件的数目小于enough_num是退出
            """
            iteration = 0
            value_pre = None
            points1 = self.points.copy()
            points2 = self.points.copy()
            while True:
                iteration += 1
                count = self.update_kkt(epsilon)
                value = self.calc_func_value()
                #print(iteration, value, count)
                if count < enough_num or iteration > max_iter:
                    break
                
                #下面部分的含义是如果我们所选的Point不能函数值下降,那么遍历其它的点
                #直到目标函数有足够的下降,如果还不行,那么抛弃第一个Point,选择其它的Point
                if value_pre == value:
                    points2.remove(dog2)
                    if len(points2) > 1:
                        dog2 = self.find_dog2(points2, dog1)
                    else:
                        points1.remove(dog1)
                        points2 = points1.copy()
                        dog1 = self.find_dog1(points1)
                        points2.remove(dog1)
                        dog2 = self.find_dog2(points2, dog1)
                else:
                    points1 = self.points.copy()
                    points2 = self.points.copy()
                    dog1 = self.find_dog1(points1)
                    points2.remove(dog1)
                    dog2 = self.find_dog2(points2, dog1)
    
                value_pre = value
                l, h = self.calc_l_h((dog1, dog2))
                self.update((dog1, dog2), l, h)
                
                """
                可视化部分
                if iteration % 10 == 9:
                    self.calc_w()
                    plt.cla()
                    plt.scatter(x, y1)
                    plt.scatter(x, y2)
                    y = -self.w[0] / self.w[1] * x - self.b
                    plt.title("SVM")
                    plt.plot(x, y, color="red")
                    plt.pause(0.01)
                """
            self.calc_w()
    
            return self.w, self.b
        
        def pre(self, x):
            """
            给定x 预测其分类, %%sh上,如果是线性分类器,
            只需计算self.w,再计算self.w @ x + self.b即可,
            为了方便扩展,这里做了一般化.
            """
            value = self.b
            for point in self.points:
                value += point.alpha * point.y * \
                        self.distance(x, point.x)
            sign = 1 if value >= 0 else -1
            return sign
    

    准备数据

    蓝色部分:
    y1 = 2x + 5 + \epsilon_1, \epsilon1 \sim \mathcal{N}(0, \sigma^2).
    红色部分
    y2 = 2x - 5 + \epsilon_2, \epsilon2 \sim \mathcal{N}(0, \sigma^2)

    np.random.seed(10086)
    x = np.linspace(0, 10, 100)
    e1 = np.random.randn(100) * 2
    e2 = np.random.randn(100) * 2
    y1 = 2 * x + 5 + e1
    y2 = 2 * x - 5 + e2
    data1 = np.hstack((x.reshape(-1, 1), y1.reshape(-1, 1)))
    data2 = np.hstack((x.reshape(-1, 1), y2.reshape(-1, 1)))
    data = np.vstack((data1, data2))
    labels = np.hstack((np.ones(100), -np.ones(100)))
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    plt.show()
    
    在这里插入图片描述

    测试

    test = Points(data, labels, c=10)
    w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
    print(w, b)
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    y = -w[0] / w[1] * x - b
    plt.plot(x, y, color="red")
    plt.show()
    
    [-1.25243632  0.5887332 ] -1.1498779874793863
    
    在这里插入图片描述

    有些时候结果是不会那么好的, 在收敛过程中还是会有波动的, 而且数据越是混越是波动大

    test.pre(np.array([10., 0.]))
    
    -1
    
    test.pre(np.array([0., 10.]))
    
    1
    
    test = Points(data, labels, c=1)
    w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
    print(w, b)
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    y = -w[0] / w[1] * x - b
    plt.plot(x, y, color="red")
    plt.show()
    
    [-1.33721113  0.83955356] -1.6680372246340474
    
    在这里插入图片描述
    test = Points(data, labels, c=0.1)
    w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
    print(w, b)
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    y = -w[0] / w[1] * x - b
    plt.plot(x, y, color="red")
    plt.show()
    
    [-0.78780019  0.44522279] -0.6061702858422205
    
    在这里插入图片描述

    实验中发现, c越大波动越大,或许在训练过程可以动态调整c

    相关文章

      网友评论

          本文标题:SVM

          本文链接:https://www.haomeiwen.com/subject/muopsctx.html