# coding:utf-8
from collections import defaultdict
import numpy as np
from numpy import *
class knn:
def __init__(self):
pass
##给出训练数据以及对应的类别
def createDataSet(self):
group = array([[1.0, 2.0], [1.2, 0.1], [0.1, 1.4], [0.3, 3.5]])
labels = ['A', 'A', 'B', 'B']
return group, labels
###通过KNN进行分类
def classify(self, input, data_set, labels, k):
# 将input扩展成 n行的举证
in_matrix = tile(input, (len(data_set), 1))
# 输入举证和数据集做差 (x1-x2)
diff = in_matrix - data_set
# (x1-x2)**2
diff = diff ** 2
# (x1-x2)** 2 +(y1-y2)**2
sm = np.sum(diff, axis=1)
sm = np.sqrt(sm)
# 将距离排序
si = np.argsort(sm)
label_dict = defaultdict(int)
max_num = 0
target_lb = None
for i in range(k):
index = si[i]
lb = labels[index]
label_dict[lb] += 1
if label_dict[lb] > max_num:
max_num = label_dict[lb]
target_lb = lb
return target_lb
def norm_data_set(self, data_set):
'''
将数据集归一化
:param data_set:
:return:
'''
# 最大和最小的行向量
val_min = np.min(data_set, 0)
val_max = np.max(data_set, 0)
# 数据变动范围向量
val_range = val_max - val_min
row_num = data_set.shape[0]
matrix_range = tile(val_range, (row_num, 1))
matrix_sp = data_set - tile(val_min, (row_num, 1))
matrix_normal = matrix_sp / matrix_range
return matrix_normal
def norm_vec(self, vec, data_set):
'''
将被分类的向量归一化
:param vec:
:param data_set:
:return:
'''
data_set = np.vstack((data_set, vec))
# 最大和最小的行向量
val_min = np.min(data_set, 0)
val_max = np.max(data_set, 0)
# 数据变动范围向量
val_range = val_max - val_min
span = vec - val_min
return span / val_range
if __name__ == '__main__':
k = knn()
g, l = k.createDataSet()
ng = k.norm_data_set(g)
vec = [0.3, 3.2]
n_vec = k.norm_vec(vec, g)
b = k.classify(n_vec, ng, l, 4)
网友评论