交叉验证
主要是针对准确率估计,提高对预测准确度的正确估计
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
#导入的训练用的数据
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 4)
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train,y_train)
scores = cross_val_score(knn,X,y,cv = 5,scoring = 'accuracy')
#scores得到的是一个list
#其中cv表示将整个数据按5次进行计算准确率
#每一次的准确率都是不同的test_data 与 train_data
print(scores.mean())
#求出平均数
k_range = range(1,31)
k_scores = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors = k)
loss = -cross_val_score(knn,X,y,cv = 10,scoring = 'mean_squared_error')
#scores = cross_val_score(knn,X,y,cv = 10,scoring = 'accuracy')
k_scores.append(loss.mean())
#k_scores.append(scores.mean())
plt.plot(k_range,k_scores)
plt.show()
可以根据不同的 knn = KNeighborsClassifier(n_neighbors = k) k值的选择选择出
最好的(准确率最高的)k值作为属性
网友评论