美文网首页
20. 日月光华 Python数据分析 - 机器学习 - 一元线

20. 日月光华 Python数据分析 - 机器学习 - 一元线

作者: 薛东弗斯 | 来源:发表于2023-07-20 06:29 被阅读0次
    import numpy as np
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    x = np.linspace(0,30,20)
    y = x + 3*np.random.randn(20)
    x
    # array([ 0.        ,  1.57894737,  3.15789474,  4.73684211,  6.31578947,
    #         7.89473684,  9.47368421, 11.05263158, 12.63157895, 14.21052632,
    #        15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
    #        23.68421053, 25.26315789, 26.84210526, 28.42105263, 30.        ])
    
    y
    # array([-2.63866254,  0.59460722, -2.71828905,  2.49031593,  8.0935728 ,
    #         3.74772581,  8.38599842,  8.57329823, 12.38546919, 18.17834314,
    #        16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
    #        24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])
    
    plt.figure(figsize=(10,8))
    plt.scatter(x,y)
    
    image.png
    from sklearn.linear_model import LinearRegression
    
    model = LinearRegression()
    x
    # array([ 0.        ,  1.57894737,  3.15789474,  4.73684211,  6.31578947,
    #         7.89473684,  9.47368421, 11.05263158, 12.63157895, 14.21052632,
    #        15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
    #        23.68421053, 25.26315789, 26.84210526, 28.42105263, 30.        ])
    
    y
    # array([-2.63866254,  0.59460722, -2.71828905,  2.49031593,  8.0935728 ,
    #         3.74772581,  8.38599842,  8.57329823, 12.38546919, 18.17834314,
    #        16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
    #        24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])
    
    X = x.reshape(-1,1)      # 变成任意行/1列的数据
    Y = y.reshape(-1,1)
    model.fit(X, Y)
    model.predict([[40]])
    # array([[44.23825314]])
    
    x
    # array([ 0.        ,  1.57894737,  3.15789474,  4.73684211,  6.31578947,
    #         7.89473684,  9.47368421, 11.05263158, 12.63157895, 14.21052632,
    #        15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
    #        23.68421053, 25.26315789, 26.84210526, 28.42105263, 30.        ])
    
    y
    # array([-2.63866254,  0.59460722, -2.71828905,  2.49031593,  8.0935728 ,
    #         3.74772581,  8.38599842,  8.57329823, 12.38546919, 18.17834314,
    #        16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
    #        24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])
    
    plt.figure(figsize = (6,4))
    plt.scatter(X,Y)
    x1 = np.linspace(0,45).reshape(-1,1)    # reshape 变换成任意行/1列的数据
    plt.plot(x1,model.predict(x1))
    
    image.png

    1.准备数据 2.初始化模型 3.预测 4.评价

    len(X)
    Y
    # array([[-2.63866254],
    #       [ 0.59460722],
    #       [-2.71828905],
    #       [ 2.49031593],
    #       [ 8.0935728 ],
    #       [ 3.74772581],
    #       [ 8.38599842],
    #       [ 8.57329823],
    #       [12.38546919],
    #       [18.17834314],
    #       [16.66062374],
    #       [17.78108955],
    #       [23.23214217],
    #       [22.98644755],
    #       [22.57692316],
    #       [24.38489051],
    #       [28.83051157],
    #       [25.62039455],
    #       [32.72456454],
    #       [28.93943828]])
    
    Y_PRE = model.predict(X)
    np.sum(np.square(Y_PRE - Y))    # 计算损失值,测试集到训练集的差
    model.intercept_    # 截距
    # array([-2.4765995])
    
    model.coef_      # 斜率
    # array([[1.16787132]])
    
    Y_PRE2 = (model.coef_ + 0.1)*X + model.intercept_
    np.sum(np.square(Y_PRE2 - Y))      # 计算损失值,测试集到训练集的平方差
    # 165.7419197589115
    

    客观的评价模型

    X_train, X_test = X[:15], X[15:]
    Y_train, Y_test = Y[:15], Y[15:]
    X_train
    # array([[ 0.        ],
    #       [ 1.57894737],
    #       [ 3.15789474],
    #       [ 4.73684211],
    #       [ 6.31578947],
    #       [ 7.89473684],
    #       [ 9.47368421],
    #       [11.05263158],
    #       [12.63157895],
    #       [14.21052632],
    #       [15.78947368],
    #       [17.36842105],
    #       [18.94736842],
    #       [20.52631579],
    #       [22.10526316]])
    
    model = LinearRegression()   # 初始化
    model.fit(X_train, Y_train)
    np.sum(np.square(model.predict(X_test) - Y_test))  
    # 64.39851406470771
    
    model.coef_
    # array([[1.2674295]])
    
    model.intercept_
    # array([-3.31979091])
    
    Y_PRE3 = model.coef_*X_test + model.intercept_ + 0.5
    np.sum(np.square(Y_PRE3 - Y_test))
    # 78.6521397131842
    
    plt.scatter(X,Y)
    plt.plot(X,model.predict(X))
    plt.plot(X , model.coef_*X + model.intercept_ + 0.5, color = 'r')
    
    image.png

    相关文章

      网友评论

          本文标题:20. 日月光华 Python数据分析 - 机器学习 - 一元线

          本文链接:https://www.haomeiwen.com/subject/mqwaudtx.html