记录一些常用的性能评价指标
预测模型 (均方误差(MSE)以及平均绝对错误(MAE) )
from math import sqrt
#自己构建实际和预测数据
target = [1.5, 2.1, 3.3, -4.7, -2.3, 0.75]
prediction = [0.5, 1.5, 2.1, -2.2, 0.1, -0.5]
#计算每一个差异
error = []
for i in range(len(target)):
error.append(target[i] - prediction[i])
##RMSE,MSE
squaredError = []
absError = []
for val in error:
squaredError.append(val*val)
absError.append(abs(val))
print("RMSE = ", sqrt(sum(squaredError)/len(squaredError)))
print("MAE = ", sum(absError)/len(absError))
来个例子回归预测评估
import urllib.request
import numpy
from sklearn import linear_model
from math import sqrt
import matplotlib.pyplot as plt
#从网页中读取数据
target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv")
data = urllib.request.urlopen(target_url)
xList = []
labels = []
names = []
firstLine = True
for line in data:
if firstLine:
names = str(line, encoding='utf-8').strip().split(";") ##utf-8编码
firstLine = False
else:
row = str(line, encoding='utf-8').strip().split(";")
labels.append(float(row[-1])) ##数据集最后一列是标签
row.pop() ##pop剔除标签
floatRow = [float(num) for num in row] ##无标签数据
xList.append(floatRow)
##拆分训练集和测试集 编号能整除3的当成测试集
indices = range(len(xList))
xListTest = [xList[i] for i in indices if i%3 == 0 ]
xListTrain = [xList[i] for i in indices if i%3 != 0 ]
labelsTest = [labels[i] for i in indices if i%3 == 0]
labelsTrain = [labels[i] for i in indices if i%3 != 0]
##转成np array格式方便计算
xTrain = numpy.array(xListTrain); yTrain = numpy.array(labelsTrain)
xTest = numpy.array(xListTest); yTest = numpy.array(labelsTest)
##岭回归参数可选域
alphaList = [0.1**i for i in [0,1, 2, 3, 4, 5, 6]]
##不能alphaList看下RMSE的变化
rmsError = []
for alph in alphaList:
wineRidgeModel = linear_model.Ridge(alpha=alph)
wineRidgeModel.fit(xTrain, yTrain)
rmsError.append(numpy.linalg.norm((yTest-wineRidgeModel.predict(
xTest)), 2)/sqrt(len(yTest)))
print("RMS Error alpha")
for i in range(len(rmsError)):
print(rmsError[i], alphaList[i])
画图看下MSE随参数alphaList的变化情况
x = range(len(rmsError))
plt.plot(x, rmsError, 'k')
plt.xlabel('-log(alpha)')
plt.ylabel('Error (RMS)')
plt.show()
##找出最好的参数alphaList(MSE最小所对应的)进行训练预测
indexBest = rmsError.index(min(rmsError))
alph = alphaList[indexBest]
wineRidgeModel = linear_model.Ridge(alpha=alph)
wineRidgeModel.fit(xTrain, yTrain)
errorVector = yTest-wineRidgeModel.predict(xTest)
plt.hist(errorVector)
plt.xlabel("Bin Boundaries")
plt.ylabel("Counts")
plt.show()
##把上面的MSE套过来用
error = []
for i in range(len(yTest)):
error.append(yTest[i] - wineRidgeModel.predict(xTest)[i])
##RMSE,MSE
squaredError = []
absError = []
for val in error:
squaredError.append(val*val)
absError.append(abs(val))
print("RMSE = ", sqrt(sum(squaredError)/len(squaredError)))
print("MAE = ", sum(absError)/len(absError))
image.png
image.png
网友评论