def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0] #1
diffMat = tile(inX, (dataSetSize,1)) - dataSet#2
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1) #3
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort() #4
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) #5
return sortedClassCount[0][0]
1 dataSetSize = dataSet.shape[0] #返回向量的行数
>>> dataSet=array([[1,2],[2,3],[3,4]])
>>> dataSet.shape
(3L, 2L)
>>> dataSet.shape[0]
3L
2 tile(inX, (dataSetSize,1))
>>>inX
[5,6]
>>> tile(inX,(dataSet.shape[0],1))
array([[5, 6],
[5, 6],
[5, 6]])
>>> tile(inX,(dataSet.shape[0],3))
array([[5, 6, 5, 6, 5, 6],
[5, 6, 5, 6, 5, 6],
[5, 6, 5, 6, 5, 6]])
3 sqDiffMat.sum(axis=1)
>>> sqDiffMat
array([[0.25, 0.36],
[0.25, 0.25],
[0.25, 0.25],
[0.25, 0.16]])
>>> sqDistances=sqDiffMat.sum(axis=1)
>>> sqDistances
array([0.61, 0.5 , 0.5 , 0.41])
4 distances.argsort()
>>> dataSet.argsort()
array([0, 4, 3, 1, 2], dtype=int64)
>>> dataSet=array([2,6,8,4,3])
>>> dataSet.argsort()
array([0, 4, 3, 1, 2], dtype=int64)
>>> dataSet[dataSet.argsort()]
array([2, 3, 4, 6, 8])
5 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
https://blog.csdn.net/vivian_ll/article/details/78647979
网友评论