美文网首页
自己造轮子-AdaBoost-DS

自己造轮子-AdaBoost-DS

作者: Alistair | 来源:发表于2017-05-24 10:56 被阅读0次

    自己造轮子系列今天造的是AdaBoost,基分类器用的是DS(decision stump)。之所以会写这个系列主要是我觉得一方面可以锻炼coding能力,而另一方面也有助于算法的理解,毕竟懂的自己推导和理解含义再到实现感觉是不一样的。

    from numpy import *
    
    #decision stump Classifier
    def stumpClassify(dataMatrix, dimen, threshVal, threshIneq):
        retArray = ones((shape(dataMatrix)[0],1))
        if threshIneq == 'lt':
            retArray[dataMatrix[:,dimen] <= threshVal] = -1.0
        else:
            retArray[dataMatrix[:,dimen] > threshVal] = -1.0
        return retArray
    
    def buildStump(dataArr, classLabels, D):#D权重向量,方便在AdaBoost中调用,三层循环,一层循环特征,一层循环步长,一层循环不等号
        dataMatrix = mat(dataArr); labelMat = mat(classLabels).T
        m,n = shape(dataMatrix)
        numSteps = 10.0; bestStump = {}; bestClasEst = mat(zeros((m,1)))
        minError = inf
        for i in range(n):
            rangeMin = dataMatrix[:,i].min(); rangeMax = dataMatrix[:,i].max();
            stepSize = (rangeMax - rangeMin) / numSteps
            for j in range(-1, int(numSteps) + 1):
                for inequal in ['lt', 'gt']:
                    threshVal = (rangeMin + float(j) * stepSize)
                    predictedVal = stumpClassify(dataMatrix, i , threshVal, inequal)
                    errArr = mat(ones((m,1)))
                    errArr[predictedVal == labelMat] = 0
                    weightedError = D.T * errArr
                    #print('split: dim %d, thresh %.2f, thresh inequal:\
                    #%s, the weighted error is : %.3f' %(i, threshVal, inequal,weightedError))
                    if weightedError < minError:
                        minError = weightedError
                        bestClasEst = predictedVal.copy()
                        bestStump['dim'] = i
                        bestStump['thresh'] = threshVal
                        bestStump['ineq'] = inequal
        return bestStump, minError, bestClasEst
    
    #adaboost的本体,50个基分类器,如果ein已经为0则break
    def adaBoostTrainDS(dataArr,classLabels, numIt = 50):
        weakClassArr = []#训练出来的基分类器保存在weakClassArr
        m = shape(dataArr)[0]
        D = mat(ones((m,1))/m)
        aggClassEst = mat(zeros((m,1)))
        for i in range(numIt):
            bestStump, error, classEst = buildStump(dataArr, classLabels, D)
            #print('D:',D.T)
            alpha = float(0.5 * log((1.0 - error) / max(error, 1e-16)))
            bestStump['alpha'] = alpha
            weakClassArr.append(bestStump)
            #print('classEst:',classEst.T)
            expon = multiply(-1 * alpha * mat(classLabels).T, classEst)
            D = multiply(D, exp(expon))
            D = D / D.sum()
            aggClassEst += alpha * classEst
            #print('aggClassEst:', aggClassEst.T)
            aggErrors = multiply(sign(aggClassEst) != mat(classLabels).T,ones((m,1)))
            errorRate = aggErrors.sum() / m
            #print('total error:' ,errorRate,'\n')
            if errorRate == 0.0:break
        return weakClassArr
    
    #构建的分类器,sign(Σα*基分类器)
    def adaClassify(datToClass, classifierArr):
        dataMatrix = mat(datToClass)
        m = shape(dataMatrix)[0]
        aggClassEst = mat(zeros((m,1)))
        for i in range(len(classifierArr)):
            classEst = stumpClassify(dataMatrix,classifierArr[i]['dim'],\
                                     classifierArr[i]['thresh'],\
                                                  classifierArr[i]['ineq'])
            aggClassEst += classifierArr[i]['alpha'] * classEst
            print(aggClassEst)
        return sign(aggClassEst)
    

    相关文章

      网友评论

          本文标题:自己造轮子-AdaBoost-DS

          本文链接:https://www.haomeiwen.com/subject/hfinxxtx.html