线性回归代码-python

作者: 橙子430 | 来源:发表于2019-01-26 13:29 被阅读1次

    线性回归模型

    其中包括5个方法

    1、最小二乘法调用numpy包实现

    2、最小二乘法调用scipy包实现

    3、自己编写最小二乘法实现

    4、线性回归模型调用sklearn包实现

    5、自己编写线性回归方法实现

    示例结果

    程序运行结果
    程序运行结果

    代码

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.optimize import leastsq  # 方法二中使用
    from sklearn import linear_model
    
    
    data = np.loadtxt('challenge_dataset.txt', delimiter=',')
    print('data.shape: {0}'.format(data.shape))
    print('data.type : {0}'.format(type(data)))
    
    
    # 线性回归模型类
    class Linear_regression_methods:
        def __init__(self, data):
            self.data = data
            self.x = data[:, 0]
            self.y = data[:, 1]
    
        def plt_method(self, title, a, b):
            # title.type is str
            # a is weight, b is bias
            plt.title(title)
            plt.plot(self.x, self.y, 'o', label='data', markersize=10)
            plt.plot(self.x, a * self.x + b, 'r', label='line')
            plt.legend()
            plt.show()
    
        def print_method(self, title, a, b):
            return print('-'*50 + "\n{}\ny = {:.5f}x + {:.5f}".format(title, a, b))
    
        def computer_error(self, a, b):
            x = self.data[:, 0]
            y = self.data[:, 1]
            totalError = (y - (a * x + b)) ** 2
            totalError = np.sum(totalError, axis=0)
            results = totalError / float(len(data))
            return print('this model final error: {:.5f}'.format(results))
    
        def one_leastsq_call_numpy_pakeage(self):
            # 调用numpy.linalg.lstsq()方法
            A = np.vstack([self.x, np.ones(len(self.x))]).T
            a, b = np.linalg.lstsq(A, self.y)[0]  # 求一个线性方程组的最小二乘解
            self.print_method('first leastsq_call_numpy_pakeage', a, b)
            self.plt_method('first leastsq_call_numpy_pakeage', a, b)  # 调用画图方法
            self.computer_error(a, b)
    
        def two_leatsq_call_scipy_pakeage(self):
            # 调用scipy.optimize中的lestsq方法
            def fun(p, x):  # 定义想要拟合的函数
                k, b = p  # 从参数p获得拟合参数
                return k*x + b
    
            def err(p, x, y):  # 定义误差函数
                return fun(p, x) - y
    
            # 定义起始的参数 即从 y = 1*x+1 开始,其实这个值可以随便设,只不过会影响到找到最优解的时间
            p0 = [1, 1]  # 也可随机初始化
            # leastsq函数需传入numpy类型
            xishu = leastsq(err, p0, args=(self.x, self.y))
            self.print_method('second leatsq_call_scipy_pakeage', xishu[0][0], xishu[0][1])
            self.plt_method('second leatsq_call_scipy_pakeage', xishu[0][0], xishu[0][1])
            self.computer_error(xishu[0][0], xishu[0][1])
    
        def three_leastsq_function(self):
            # 最小二乘法手动实现方法
            def calcAB(x, y):
                n = len(x)
                sumX, sumY, sumXY, sumXX=0, 0, 0, 0
                for i in range(0, n):
                    sumX += x[i]
                    sumY += y[i]
                    sumXX += x[i]*x[i]
                    sumXY += x[i]*y[i]
                a = (sumXY - (1/n) * (sumX * sumY)) / (sumXX - (1/n) * sumX * sumX)
                b = sumY/n - a * sumX/n
                return a, b
            a, b = calcAB(self.x, self.y)
            self.print_method('third leastsq_function', a, b)
            self.plt_method('third leastsq_function', a, b)
            self.computer_error(a, b)
    
        def four_linear_model_call_sklearn(self):
            # train model on data
            body_reg = linear_model.LinearRegression()
            x_values = self.x.reshape(-1, 1)
            y_values = self.y.reshape(-1, 1)
            body_reg.fit(x_values, y_values)
            results = body_reg.predict(x_values)
            a = float((results[0] - results[1]) / (self.x[0] - self.x[1]))  # 确定两点求直线的斜率与截距
            b = float(results[1] - a * self.x[1])
            self.print_method('fourth linear_model_call_sklearn', a, b)
            self.plt_method('fourth linear_model_call_sklearn', a, b)
            self.computer_error(a, b)
    
        def five_linear_regression(self):
            def computer_gradent(b_current, m_current, data, learning_rate):
                b_gradient = 0
                m_gradient = 0
                N = float(len(data))
                # 向量化形式
                x = data[:, 0]
                y = data[:, 1]
                b_gradient = -(2 / N) * (y - (m_current * x + b_current))  # 对平方误差损失函数求偏导
                b_gradient = np.sum(b_gradient, axis=0)
                m_gradient = -(2 / N) * x * (y - (m_current * x + b_current))  # 目的是极小化平方误差
                m_gradient = np.sum(m_gradient, axis=0)
                # 用偏导数更新b和m的值
                new_b = b_current - (learning_rate * b_gradient)
                new_m = m_current - (learning_rate * m_gradient)
                return [new_b, new_m]
    
            def optimizer(data, starting_b, starting_m, learning_rate, num_iter):
                b = starting_b
                m = starting_m
                # gradient descent
                for i in range(num_iter):
                    # update b and m with the new more accurate b and m by performing
                    # this gradient step
                    b, m = computer_gradent(b, m, data, learning_rate)
                return [b, m]
    
            def Linear_regerssion(data):
                # define hyperparamters 定义超参数
                # learning_rate is used for update gradient
                # define the number that will iteration
                # define  y =mx+b
                learning_rate = 0.001
                initial_b = 0.0
                initial_m = 0.0
                num_iter = 1000
                [b, m] = optimizer(data, initial_b, initial_m, learning_rate, num_iter)
                return m, b
            m, b = Linear_regerssion(self.data)
            self.print_method('five_linear_regression', m, b)
            self.plt_method('five_linear_regression', m, b)
            self.computer_error(m, b)
    
    
    model = Linear_regression_methods(data)
    model.one_leastsq_call_numpy_pakeage()
    model.two_leatsq_call_scipy_pakeage()
    model.three_leastsq_function()
    model.four_linear_model_call_sklearn()
    model.five_linear_regression()
    

    相关文章

      网友评论

        本文标题:线性回归代码-python

        本文链接:https://www.haomeiwen.com/subject/fwlijqtx.html