08 线性回归

作者: fat32jin | 来源:发表于2019-01-17 14:21 被阅读0次

    对于单变量线性回归而言,在误差函数服从正态分布的情况下,从几 何意义出发的最小二乘法与从概率意义出发的最大似然估计是等价的 


    几何意义推导:


    $1 一元线性回归:

    y = ax + b

    图1  y 到直线截距最小

    手工计算例子:

    $2  多元线性回归:


    机器学习实战的推导:


    $3 代码实例(机器学习实战):  

    from numpy import *

    def loadDataSet(fileName):      #装载文件 转化 tab分割的浮点数

        numFeat = len(open(fileName).readline().split('\t')) - 1  #get number of fields

        dataMat = []; labelMat = []

        fr = open(fileName)

        for line in fr.readlines():

            lineArr =[]

            curLine = line.strip().split('\t')

            for i in range(numFeat):

                lineArr.append(float(curLine[i]))

            dataMat.append(lineArr)

            labelMat.append(float(curLine[-1]))

        return dataMat,labelMat

    #标准线性回归方法

    def standRegres(xArr,yArr):

        xMat = mat(xArr); yMat = mat(yArr).T

        xTx = xMat.T*xMat

        if linalg.det(xTx) == 0.0:

            print("This matrix is singular, cannot do inverse")

            return

        ws = xTx.I * (xMat.T*yMat)

        return ws

    #加权线性回归方法

    def lwlr(testPoint,xArr,yArr,k=1.0):

        xMat = mat(xArr); yMat = mat(yArr).T

        m = shape(xMat)[0]

        weights = mat(eye((m)))

        for j in range(m):                      #next 2 lines create weights matrix

            diffMat = testPoint - xMat[j,:]    #

            weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))

        xTx = xMat.T * (weights * xMat)

        if linalg.det(xTx) == 0.0:

            print("This matrix is singular, cannot do inverse")

            return

        ws = xTx.I * (xMat.T * (weights * yMat))

        return testPoint * ws

    #加权回归测试

    def lwlrTest(testArr,xArr,yArr,k=1.0):  #loops over all the data points and applies lwlr to each one

        m = shape(testArr)[0]

        yHat = zeros(m)

        for i in range(m):

            yHat[i] = lwlr(testArr[i],xArr,yArr,k)

        return yHat

    def lwlrTestPlot(xArr,yArr,k=1.0):  #same thing as lwlrTest except it sorts X first

        yHat = zeros(shape(yArr))      #easier for plotting

        xCopy = mat(xArr)

        xCopy.sort(0)

        for i in range(shape(xArr)[0]):

            yHat[i] = lwlr(xCopy[i],xArr,yArr,k)

        return yHat,xCopy

    def rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays

        return ((yArr-yHatArr)**2).sum()


        

    $4 岭回归:

    解决过拟合问题,常见的做法是正则化,即添加额外的惩罚项。

    ¥4.1 岭回归代码

    #岭回归方法

    def ridgeRegres(xMat,yMat,lam=0.2):

        xTx = xMat.T*xMat

        denom = xTx + eye(shape(xMat)[1])*lam

        if linalg.det(denom) == 0.0:

            print("This matrix is singular, cannot do inverse")

            return

        ws = denom.I * (xMat.T*yMat)

        return ws

    #岭回归测试

    def ridgeTest(xArr,yArr):

        xMat = mat(xArr); yMat=mat(yArr).T

        yMean = mean(yMat,0)

        yMat = yMat - yMean    #to eliminate X0 take mean off of Y

        #regularize X's

        xMeans = mean(xMat,0)  #calc mean then subtract it off

        xVar = var(xMat,0)      #calc variance of Xi then divide by it

        xMat = (xMat - xMeans)/xVar

        numTestPts = 30

        wMat = zeros((numTestPts,shape(xMat)[1]))

        for i in range(numTestPts):

            ws = ridgeRegres(xMat,yMat,exp(i-10))

            wMat[i,:]=ws.T

        return wMat

    def regularize(xMat):#regularize by columns

        inMat = xMat.copy()

        inMeans = mean(inMat,0)  #calc mean then subtract it off

        inVar = var(inMat,0)      #calc variance of Xi then divide by it

        inMat = (inMat - inMeans)/inVar

        return inMat

    def stageWise(xArr,yArr,eps=0.01,numIt=100):

        xMat = mat(xArr); yMat=mat(yArr).T

        yMean = mean(yMat,0)

        yMat = yMat - yMean    #can also regularize ys but will get smaller coef

        xMat = regularize(xMat)

        m,n=shape(xMat)

        returnMat = zeros((numIt,n)) #testing code remove

        ws = zeros((n,1)); wsTest = ws.copy(); wsMax = ws.copy()

        for i in range(numIt):

            print(ws.T)

            lowestError = inf;

            for j in range(n):

                for sign in [-1,1]:

                    wsTest = ws.copy()

                    wsTest[j] += eps*sign

                    yTest = xMat*wsTest

                    rssE = rssError(yMat.A,yTest.A)

                    if rssE < lowestError:

                        lowestError = rssE

                        wsMax = wsTest

            ws = wsMax.copy()

            returnMat[i,:]=ws.T

        return returnMat

    相关文章

      网友评论

        本文标题:08 线性回归

        本文链接:https://www.haomeiwen.com/subject/zbqedqtx.html