美文网首页
knn分类算法底层实现(python)

knn分类算法底层实现(python)

作者: 吃番茄的土拨鼠 | 来源:发表于2018-05-17 16:33 被阅读0次
    # coding:utf-8
    from collections import defaultdict
    
    import numpy as np
    from numpy import *
    
    
    class knn:
        def __init__(self):
            pass
    
        ##给出训练数据以及对应的类别
        def createDataSet(self):
            group = array([[1.0, 2.0], [1.2, 0.1], [0.1, 1.4], [0.3, 3.5]])
            labels = ['A', 'A', 'B', 'B']
            return group, labels
    
        ###通过KNN进行分类
        def classify(self, input, data_set, labels, k):
            # 将input扩展成 n行的举证
            in_matrix = tile(input, (len(data_set), 1))
            # 输入举证和数据集做差 (x1-x2)
            diff = in_matrix - data_set
            # (x1-x2)**2
            diff = diff ** 2
            # (x1-x2)** 2 +(y1-y2)**2
            sm = np.sum(diff, axis=1)
            sm = np.sqrt(sm)
            # 将距离排序
            si = np.argsort(sm)
            label_dict = defaultdict(int)
            max_num = 0
            target_lb = None
            for i in range(k):
                index = si[i]
                lb = labels[index]
                label_dict[lb] += 1
                if label_dict[lb] > max_num:
                    max_num = label_dict[lb]
                    target_lb = lb
            return target_lb
    
        def norm_data_set(self, data_set):
            '''
            将数据集归一化
            :param data_set: 
            :return: 
            '''
            # 最大和最小的行向量
            val_min = np.min(data_set, 0)
            val_max = np.max(data_set, 0)
            # 数据变动范围向量
            val_range = val_max - val_min
            row_num = data_set.shape[0]
            matrix_range = tile(val_range, (row_num, 1))
            matrix_sp = data_set - tile(val_min, (row_num, 1))
            matrix_normal = matrix_sp / matrix_range
            return matrix_normal
    
        def norm_vec(self, vec, data_set):
            '''
            将被分类的向量归一化
            :param vec: 
            :param data_set: 
            :return: 
            '''
            data_set = np.vstack((data_set, vec))
            # 最大和最小的行向量
            val_min = np.min(data_set, 0)
            val_max = np.max(data_set, 0)
            # 数据变动范围向量
            val_range = val_max - val_min
            span = vec - val_min
            return span / val_range
    
    
    if __name__ == '__main__':
        k = knn()
        g, l = k.createDataSet()
        ng = k.norm_data_set(g)
        vec = [0.3, 3.2]
        n_vec = k.norm_vec(vec, g)
        b = k.classify(n_vec, ng, l, 4)
    
    
    

    相关文章

      网友评论

          本文标题:knn分类算法底层实现(python)

          本文链接:https://www.haomeiwen.com/subject/fmzbdftx.html