美文网首页Python
Python机器学习3

Python机器学习3

作者: 西萌XXX | 来源:发表于2021-05-06 16:30 被阅读0次

    记录一些常用的性能评价指标

    预测模型 (均方误差(MSE)以及平均绝对错误(MAE) )
    from math import sqrt
       #自己构建实际和预测数据
    target = [1.5, 2.1, 3.3, -4.7, -2.3, 0.75]
    prediction = [0.5, 1.5, 2.1, -2.2, 0.1, -0.5]
    
       #计算每一个差异
    error = []
    for i in range(len(target)):
        error.append(target[i] - prediction[i])
    
    ##RMSE,MSE
    squaredError = []
    absError = []
    for val in error:
        squaredError.append(val*val)
        absError.append(abs(val))
        
    print("RMSE = ", sqrt(sum(squaredError)/len(squaredError)))
    print("MAE = ", sum(absError)/len(absError))
    
    来个例子回归预测评估
    import urllib.request
    import numpy
    from sklearn import  linear_model
    from math import sqrt
    import matplotlib.pyplot as plt
     
    #从网页中读取数据
    target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv")
    data = urllib.request.urlopen(target_url)
    
    
    xList = []
    labels = []
    names = []
    firstLine = True
    for line in data:
        if firstLine:
            names = str(line, encoding='utf-8').strip().split(";")   ##utf-8编码
            firstLine = False
        else:
            row = str(line, encoding='utf-8').strip().split(";")
            labels.append(float(row[-1]))          ##数据集最后一列是标签
            row.pop()                                       ##pop剔除标签
            floatRow = [float(num) for num in row]           ##无标签数据
            xList.append(floatRow)
    
    ##拆分训练集和测试集 编号能整除3的当成测试集
    indices = range(len(xList))
    xListTest = [xList[i] for i in indices if i%3 == 0 ]
    xListTrain = [xList[i] for i in indices if i%3 != 0 ]
    labelsTest = [labels[i] for i in indices if i%3 == 0]
    labelsTrain = [labels[i] for i in indices if i%3 != 0]
    
    ##转成np array格式方便计算
    xTrain = numpy.array(xListTrain); yTrain = numpy.array(labelsTrain)
    xTest = numpy.array(xListTest); yTest = numpy.array(labelsTest)
    
    ##岭回归参数可选域
    alphaList = [0.1**i for i in [0,1, 2, 3, 4, 5, 6]]
    
    ##不能alphaList看下RMSE的变化
    rmsError = []
    for alph in alphaList:
         wineRidgeModel = linear_model.Ridge(alpha=alph)
         wineRidgeModel.fit(xTrain, yTrain)
         rmsError.append(numpy.linalg.norm((yTest-wineRidgeModel.predict(
         xTest)), 2)/sqrt(len(yTest)))
    
    print("RMS Error alpha")
    for i in range(len(rmsError)):
       print(rmsError[i], alphaList[i])
    
    画图看下MSE随参数alphaList的变化情况
    x = range(len(rmsError))
    plt.plot(x, rmsError, 'k')
    plt.xlabel('-log(alpha)')
    plt.ylabel('Error (RMS)')
    plt.show()
    
    ##找出最好的参数alphaList(MSE最小所对应的)进行训练预测
    indexBest = rmsError.index(min(rmsError))
    alph = alphaList[indexBest]
    wineRidgeModel = linear_model.Ridge(alpha=alph)
    wineRidgeModel.fit(xTrain, yTrain)
    errorVector = yTest-wineRidgeModel.predict(xTest)
    plt.hist(errorVector)
    plt.xlabel("Bin Boundaries")
    plt.ylabel("Counts")
    plt.show()
    
    ##把上面的MSE套过来用
    error = []
    for i in range(len(yTest)):
        error.append(yTest[i] - wineRidgeModel.predict(xTest)[i])
    
    ##RMSE,MSE
    squaredError = []
    absError = []
    for val in error:
        squaredError.append(val*val)
        absError.append(abs(val))
    
    
    print("RMSE = ", sqrt(sum(squaredError)/len(squaredError)))
    print("MAE = ", sum(absError)/len(absError))
    
    
    image.png image.png

    相关文章

      网友评论

        本文标题:Python机器学习3

        本文链接:https://www.haomeiwen.com/subject/xhhddltx.html