
作者: withism | 来源:发表于2017-12-10 23:35 被阅读39次

import numpy as np

class KNearestNeighbor(object):

""" a kNN classifier with L2 distance """

def __init__(self):


def train(self, X, y):


Train the classifier. For k-nearest neighbors this is just

memorizing the training data.


- X: A numpy array of shape (num_train, D) containing the training data

consisting of num_train samples each of dimension D.

- y: A numpy array of shape (N,) containing the training labels, where

y[i] is the label for X[i].


self.X_train = X

self.y_train = y

def predict(self, X, k=1, num_loops=0):


Predict labels for test data using this classifier.


- X: A numpy array of shape (num_test, D) containing test data consisting

of num_test samples each of dimension D.

- k: The number of nearest neighbors that vote for the predicted labels.

- num_loops: Determines which implementation to use to compute distances

between training points and testing points.


- y: A numpy array of shape (num_test,) containing predicted labels for the

test data, where y[i] is the predicted label for the test point X[i].


if num_loops == 0:

dists = self.compute_distances_no_loops(X)

elif num_loops == 1:

dists = self.compute_distances_one_loop(X)

elif num_loops == 2:

dists = self.compute_distances_two_loops(X)


raise ValueError('Invalid value %d for num_loops' % num_loops)

return self.predict_labels(dists, k=k)

def compute_distances_two_loops(self, X):


Compute the distance between each test point in X and each training point

in self.X_train using a nested loop over both the training data and the

test data.


- X: A numpy array of shape (num_test, D) containing test data.


- dists: A numpy array of shape (num_test, num_train) where dists[i, j]

is the Euclidean distance between the ith test point and the jth training



num_test = X.shape[0]

num_train = self.X_train.shape[0]

dists = np.zeros((num_test, num_train))

for i in xrange(num_test):

for j in xrange(num_train):


# TODO:                                                            #

# Compute the l2 distance between the ith test point and the jth    #

# training point, and store the result in dists[i, j]. You should  #

# not use a loop over dimension.                                    #


# pass

dists[i][j] = np.sqrt(np.sum(np.square(X[i] - self.X_train[j])))


#                      END OF YOUR CODE                            #


return dists

def compute_distances_one_loop(self, X):


Compute the distance between each test point in X and each training point

in self.X_train using a single loop over the test data.

Input / Output: Same as compute_distances_two_loops


num_test = X.shape[0]

num_train = self.X_train.shape[0]

dists = np.zeros((num_test, num_train))

for i in xrange(num_test):


# TODO:                                                              #

# Compute the l2 distance between the ith test point and all training #

# points, and store the result in dists[i, :].                        #


# pass

dists[i] = np.sqrt(np.sum(np.square(self.X_train - X[i]), axis = 1))


#                        END OF YOUR CODE                            #


return dists

def compute_distances_no_loops(self, X):


Compute the distance between each test point in X and each training point

in self.X_train using no explicit loops.

Input / Output: Same as compute_distances_two_loops


num_test = X.shape[0]

num_train = self.X_train.shape[0]

dists = np.zeros((num_test, num_train))


# TODO:                                                                #

# Compute the l2 distance between all test points and all training      #

# points without using any explicit loops, and store the result in      #

# dists.                                                                #

#                                                                      #

# You should implement this function using only basic array operations; #

# in particular you should not use functions from scipy.                #

#                                                                      #

# HINT: Try to formulate the l2 distance using matrix multiplication    #

#      and two broadcast sums.                                        #


# pass

dists = np.sqrt(-2*, self.X_train.T) + np.sum(np.square(self.X_train), axis = 1) + np.transpose([np.sum(np.square(X), axis = 1)]))


#                        END OF YOUR CODE                              #


return dists

def predict_labels(self, dists, k=1):


Given a matrix of distances between test points and training points,

predict a label for each test point.


- dists: A numpy array of shape (num_test, num_train) where dists[i, j]

gives the distance betwen the ith test point and the jth training point.


- y: A numpy array of shape (num_test,) containing predicted labels for the

test data, where y[i] is the predicted label for the test point X[i].


num_test = dists.shape[0]

y_pred = np.zeros(num_test)

for i in xrange(num_test):

# A list of length k storing the labels of the k nearest neighbors to

# the ith test point.

closest_y = []


# TODO:                                                                #

# Use the distance matrix to find the k nearest neighbors of the ith    #

# testing point, and use self.y_train to find the labels of these      #

# neighbors. Store these labels in closest_y.                          #

# Hint: Look up the function numpy.argsort.                            #


# pass

closest_y = self.y_train[np.argsort(dists[i])[:k]]


# TODO:                                                                #

# Now that you have found the labels of the k nearest neighbors, you    #

# need to find the most common label in the list closest_y of labels.  #

# Store this label in y_pred[i]. Break ties by choosing the smaller    #

# label.                                                                #


# pass

y_pred[i] = np.argmax(np.bincount(closest_y))


#                          END OF YOUR CODE                            #


return y_pred


  • Spark --基于DataFrame API实现KNN算法

    Spark -- 基于DataFrame API实现KNN算法 KNN简介 KNN(k-Nearest Neigh...

  • KNN近邻算法总结

    目录 一、KNN近邻算法思想 二、KNN模型三大要素 三、KNN算法实现步骤 四、KNN算法的KD树实现 五、总结...

  • 利用Python进行数字识别

    思路 通过Python实现KNN算法。而KNN算法就是K最近邻(k-Nearest Neighbor,KNN)分类...

  • KNN

    KNN学习笔记 KNN is a classification algorithm which is instan...

  • 第六节分类算法

    1knn算法 1.1knn的过程 1.2scilit-learn中的knn 1.3scikit-learn机器学习...

  • knn算法

    knn算法 knn算法简介 邻近算法,或者说K最近邻(kNN,k-NearestNeighbor)分类算法。所谓K...

  • 机器学习笔记汇总


  • 01 KNN算法 - 概述

    KNN算法全称是K近邻算法 (K-nearst neighbors,KNN) KNN是一种基本的机器学习算法,所谓...

  • KNN算法-1-KNN简介

    KNN入门 1、KNN简介 kNN(k-NearestNeighbor),也就是k最近邻算法,这是一种有监督的学习...

  • KNN算法以及欧式距离

    1.KNN算法介绍 KNN 是什么? KNN(K-Nearest Neighbor)是最简单的机器学习算法之一,可...


