from numpy import *
import operator
def createDataSet():
group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A','A','B','B']
return(group,labels)
def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]
diffMat=tile(inX,(dataSetSize,1))-dataSet
sqDiffMat=diffMat**2
sqDistances=sqDiffMat.sum(axis=1) #axis=1表示按行相加 , axis=0表示按列相加
distances=sqDistances**0.5
print(distances)
sortedDistIndicies=distances.argsort() #升序排列,得到的是值所对应的索引
print(sortedDistIndicies)
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0)+1 #出现次数越多,值越大
sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) #降序排列,比较第2个元素,返回发生频率最高的元素标签
return(sortedClassCount[0][0])
(dataset,labels)=createDataSet()
result=classify0([0,0],dataset,labels,3)
函数知识:http://www.cnblogs.com/100thMountain/p/4719503.html
网友评论