美文网首页
ML学习笔记_KNN手写字体识别

ML学习笔记_KNN手写字体识别

作者: 阿兰先生 | 来源:发表于2020-01-19 18:45 被阅读0次

KNN算法实际应用学习,手写字体识别,我们将手写字体二值化为当前的格式,同时控制输入字体的大小,方便后续我们使用KNN算法来进行识别。

#kNN 手写字体模式识别
import time

from numpy import *
import os

def kNNclassify(inX, dataSet, lables, k):
    '''
    kNN 算法核心思想
    :param inX: 用于分类的向量
    :param dataSet:训练样本集
    :param lables:标签向量
    :param k:近邻数目
    :return:
    '''
    # 通过欧式公式计算点的距离,然后打上不同的标签
    dataSetSize = dataSet.shape[0]
    diffMat = tile(inX, (dataSetSize,1)) - dataSet
    sqDiffMat = diffMat ** 2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndicies = distances.argsort() # 此时已经排序好所有距离
    classCount = {}
    for i in range(k):
        voteIlable = lables[sortedDistIndicies[i]]
        classCount[voteIlable] = classCount.get(voteIlable,0) + 1
    #     dic为比较函数,value 为排序的对象(这里指键或键值),reverse:注明升序还是降序,True--降序,False--升序(默认)
    sortedClassCount = sorted(classCount.items(), key=lambda x:x[1], reverse=True)
    return sortedClassCount[0][0]


def img2vector(filename):
    '''
    转换图片为向量,数字已经二值化为32*32的图片
    :param filename:
    :return:
    '''
    returnVect = zeros((1, 1024))
    fr = open(filename)
    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0,32 * i + j] = int(lineStr[j])
    return returnVect

def handwritingClassTest(trainPath,testPath):
    '''
    手写算法kNN
    :return:
    '''
    hwLables = []
    # 先构造训练矩阵
    trainningFileList = os.listdir(trainPath)
    m = len(trainningFileList)
    trainningMat = zeros((m, 1024))

    for i in range(m):
        if 'txt' in trainningFileList[i]:
            fileNameStr = trainningFileList[i]
            trainningMat[i,:] = img2vector(os.path.join(trainPath,fileNameStr))
            hwLables.append(int(fileNameStr.replace('_','.').split('.')[0]))
    # begin to test the Algo
    errorCount = 0
    for testPic in os.listdir(testPath):
        if 'txt' in testPic:
            testNum = int(testPic.replace('_','.').split('.')[0])
            tesVector = img2vector(os.path.join(trainPath,testPic))
            start = time.time()
            knnTsetResult = kNNclassify(tesVector,trainningMat,hwLables,3)
            costTime = time.time() - start
            # print("KNN cost: %d" % costTime)
            if (knnTsetResult != testNum):
                errorCount += 1.0
                print("the file is %s , the classifier came back with: %d, the real answer is: %d" % (testPic, knnTsetResult, testNum))
    totalTestTime = len(os.listdir(testPath))
    print("\nthe total test time is %d, number of errors is: %d" % (totalTestTime, errorCount))
    er = 0 if totalTestTime == 0 else format(errorCount / totalTestTime, '.3%')
    print("\nthe total error rate is: %s" %er)

def knn_debug():
    group = array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1]])
    lables = ['a','a','b','b']
    test = kNNclassify([0,0.2], group, lables, 3)
    print(test)

if __name__ == '__main__':

    handwritingClassTest("/Users/moxi.hyy/Downloads/machinelearninginaction/Ch02/digits/trainingDigits",
                         "/Users/moxi.hyy/Downloads/machinelearninginaction/Ch02/digits/testDigits")

相关文章

网友评论

      本文标题:ML学习笔记_KNN手写字体识别

      本文链接:https://www.haomeiwen.com/subject/ootdzctx.html