美文网首页
线性回归、交叉验证、最优调参

线性回归、交叉验证、最优调参

作者: ss_6bda | 来源:发表于2018-07-15 00:04 被阅读733次

    一、普通的线性模型

    import numpy as np

    import pandas as pd

    import matplotlib.pyplot as plt

    from sklearn.linear_model import LinearRegression

    from sklearn.model_selection import train_test_split

    from sklearn.preprocessing import StandardScaler

    from sklearn import metrics

    %matplotlib inline

    data = pd.read_csv('Advertising.csv',index_col=0)#第一列为index

    data.head()

    #切分训练集和测试集

    x = data.values[:,:3]

    y = data.values[:,3]

    x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7,random_state=0)

    #标准化处理

    sc = StandardScaler()

    x_train_std = sc.fit_transform(x_train)

    x_test_std = sc.transform(x_test)

    #训练模型

    linreg = LinearRegression()

    linreg.fit(x_train_std,y_train)

    y_pred = linreg.predict(x_test_std)

    #检验模型结果

    mse = np.average((y_pred-y_test)**2)

    metrics.mean_squared_error(y_pred,y_test)  #这个也是均方误差

    r2 = metrics.r2_score(y_test,y_pred)  #R2值,注意参数,前面的是实际值,后面的是预测值

    mse,r2

    #计算R2

    def calculater2(y_pred,y_test):

        RSS = ((y_pred-y_test)**2).sum()

        TSS = (((y_test-np.average(y_test))**2)).sum()

        return 1-(RSS/TSS)

    calculater2(y_pred,y_test)

    #画图

    fig = plt.figure(figsize=(10,6))

    plt.plot(y_test)

    plt.plot(y_pred)

    二、加入正则化的模型

    Ridge回归

    from sklearn.linear_model import RidgeCV,LassoCV  #用这个自带交叉验证参数

    from sklearn.model_selection import GridSearchCV  #如果使用RidgeCV就不用GridSearchCV这个API了

    #使用RidgeCV来建立参数

    alpha = np.logspace(-3,2,10)    #生成超参数,10的-3次方到10的2次方的等差数列

    ridge = RidgeCV(alpha,cv=5)

    ridge.fit(x_train_std,y_train)

    ridge.alpha_  #输出超参数的值

    #使用Ridge配合GridSearchCV来做

    from sklearn.linear_model import Ridge,Lasso

    ridge_model = GridSearchCV(Ridge(),param_grid={'alpha':alpha},cv=5)

    ridge_model.fit(x_train_std,y_train)

    ridge_model.best_params_

    #验证模型效果

    y_pred_ridge = ridge.predict(x_test_std)

    mse_ridge = metrics.mean_squared_error(y_test,y_pred_ridge)

    r2_ridge = metrics.r2_score(y_test,y_pred_ridge)

    mse_ridge,r2_ridge

    Lasso回归

    #建立模型

    lasso = LassoCV(alphas=alpha,cv=5)

    lasso.fit(x_train_std,y_train)

    lasso.alpha_

    #验证模型效果

    y_pred_lasso = lasso.predict(x_test_std)

    mse_lasso = metrics.mean_squared_error(y_test,y_pred_lasso)

    r2_lasso = metrics.r2_score(y_test,y_pred_lasso)

    mse_lasso,r2_lasso

    相关文章

      网友评论

          本文标题:线性回归、交叉验证、最优调参

          本文链接:https://www.haomeiwen.com/subject/tnuupftx.html