美文网首页
K-近邻法

K-近邻法

作者: strealingFire | 来源:发表于2018-04-21 21:07 被阅读0次

    简单地说,k近邻算法采用测量不同特征值之间的距离方法进行分类。

    from numpy import *

    import operator

    def classify0(inX,dataSet,labels,k):

        dataSetSize = dataSet.shape[0]

        diffMat = tile(inX,(dataSetSize,1))-dataSet

        sqDiffMat = diffMat**2

        sqDistances = sqDiffMat.sum(axis =1)

        distances = sqDistances**0.5

        sortedDistIndicies = distances.argsort()

        classCount = {}

        for i in range(k):

            voteIlabel = labels[sortedDistIndicies[i]]

            classCount[voteIlabel] = classCount.get(voteIlabel,0) +1

            sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)

            return sortedClassCount[0][0]

    classify0([1,1],group,labels,3)


    def file2matrix(filename):

        fr = open(filename)

        array0lines = fr.readlines()

        number0fLines = len(array0lines)

        #得到文件行数

        returnMat = zeros((number0fLines,3))  #创建返回的Numpy矩阵

        classLabelVector = []

        index= 0

        #解析文件数据到列表

        for line in array0lines:

            line = line.strip()

            listFromLine = line.split('\t')

            returnMat[index,:] = listFromLine[0:3]

            classLabelVector.append(int(listFromLine[-1]))

            index+=1

        return returnMat,classLabelVector

    datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')


    def autoNorm(dataSet):

        minVals = dataSet.min(0) #这个最小的那个是行和列的交叉坐标

        maxVals = dataSet.max(0)

        ranges = maxVals - minVals

        normDataSet = zeros(shape(dataSet)) #表示按照某种结构建立一个所有值为0的二维数组

        m = dataSet.shape[0]

        normDataSet = dataSet - tile(minVals,(m,1))

        normDataSet = normDataSet/tile(ranges,(m,1))

        return normDataSet,ranges,minVals


    normMat ,ranges,minVals = autoNorm(datingDataMat)


    def datingClassTest():

        hoRatio = 0.10

        datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')

        normMat,ranges,minVals =autoNorm(datingDataMat)

        m = normMat.shape[0]

        numTestVecs = int(m*hoRatio)

        errorCount = 0.0

        for i in range(numTestVecs):

            classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)

            print ("the classifier came back with: %d,the real answer is : %d",(classifierResult,datingLabels[i]))

            if(classifierResult != datingLabels[i]): errorCount +=1.0

        print ("the total error rate is:",(errorCount/float(numTestVecs)))


    def classifyPerson():

        resultList = ['not at all','in small doses','in large doses']

        percentTats = float(input("percentage of time spent playing video games?"))

        ffMiles = float(input("frequent flier miles earned per year ?"))

        iceCream = float(input("liters of ice cream consumed per year?"))

        datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')

        normMat,ranges,minVals = autoNorm(datingDataMat)

        inArr = array([ffMiles,percentTats,iceCream])

        classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3)

        print("You will probably like this person:",resultList[classifierResult])

    相关文章

      网友评论

          本文标题:K-近邻法

          本文链接:https://www.haomeiwen.com/subject/lduflftx.html