美文网首页
逻辑回归练习

逻辑回归练习

作者: WJD_photo | 来源:发表于2020-11-25 00:03 被阅读0次
    import numpy as np
    import pandas as pd
    from sklearn.linear_model import LogisticRegression as LR
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import cross_val_score
    from sklearn.feature_selection import SelectFromModel
    from sklearn.metrics import accuracy_score
    from sklearn import preprocessing
    import seaborn as sns
    import matplotlib.pyplot as plt
    from imblearn.over_sampling import SMOTE
    
    bankdata.info()
    
    微信截图_20201125002401.png
    c=round(bankdata.drop(['CHURN_CUST_IND'],axis=1).corr(),1)
    plt.subplots(figsize=(20, 20))
    sns.heatmap(c,annot=True,vmax=1, square=True,cmap="YlGnBu")
    plt.show()
    
    微信截图_20201125002420.png
    bd = bankdata.drop(['LOCAL_FIX_MON_AVG_BAL_PROP','LOCAL_CUR_MON_AVG_BAL','LOCAL_BELONEYR_FF_MON_AVG_BAL','LOCAL_FIX_MON_AVG_BAL','LOCAL_SAV_SLOPE',
                  'LOCAL_SAV_CUR_ALL_BAL','LOCAL_SAV_MON_AVG_BAL','SAV_CUR_ALL_BAL','SAV_MON_AVG_BAL','ASSET_MON_AVG_BAL','LOCAL_CUR_TRANS_TX_AMT',
                  'ATM_ACCT_TX_NUM','COUNTER_ALL_TX_NUM','TELEBANK_ALL_TX_NUM'],axis=1)
    
    c=round(bd.corr(),1)
    plt.subplots(figsize=(20, 20))
    sns.heatmap(c,annot=True,vmax=1, square=True,cmap="YlGnBu")
    plt.show()
    
    微信截图_20201125002430.png
    enc = preprocessing.LabelEncoder()
    for cols in ['GENDER_CD', 'HASNT_HOME_ADDRESS_INF', 'HASNT_MOBILE_TEL_NUM_INF']:
        bd[cols] = enc.fit_transform(bd[cols])
    x = bd.drop(['CUST_ID'],axis=1).values
    y = bd['CHURN_CUST_IND']
    
    #上采样
    sm = SMOTE(random_state=42)
    x1,y1 = sm.fit_sample(x,y)
    
    #C调参
    l2 = []
    l2test = []
    xtrain,xtest,ytrain,ytest = train_test_split(x1,y1,test_size=0.3,random_state=420)
    
    n1 = np.linspace(0.1,10,9)
    for i in n1:
        lrl2 = LR(penalty='l2',solver='lbfgs',C=i,max_iter=10000)    
        lr2 = lrl2.fit(xtrain,ytrain)
        l2.append(accuracy_score(lr2.predict(xtrain),ytrain))
        l2test.append(accuracy_score(lr2.predict(xtest),ytest))
    
    l = [l2,l2test]
    color = ['red','green']
    label = ['l2','l2test']
    for i in range(len(l)):
        plt.plot(n1,l[i],color[i],label=label[i])
    plt.legend(loc=4)
    plt.show()
    
    微信截图_20201125002504.png
    l2 = []
    l2test = []
    xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.3,random_state=420)
    
    n2 = np.linspace(3,5,19)
    for i in n2:
        lrl2 = LR(penalty='l2',solver='lbfgs',C=i,max_iter=10000)    
        lr2 = lrl2.fit(xtrain,ytrain)
        l2.append(accuracy_score(lr2.predict(xtrain),ytrain))
        l2test.append(accuracy_score(lr2.predict(xtest),ytest))
    
    l = [l2,l2test]
    color = ['red','green']
    label = ['l2','l2test']
    for i in range(len(l)):
        plt.plot(n2,l[i],color[i],label=label[i])
    plt.legend(loc=4)
    plt.show()
    
    微信截图_20201125002513.png
    #max_iter调参
    l2 = []
    l2test = []
    for i in np.arange(1,10000,500):
        lrl2 = LR(penalty='l2',solver='lbfgs',C=c,max_iter=i)
        lr2 = lrl2.fit(xtrain,ytrain)
        l2.append(accuracy_score(lr2.predict(xtrain),ytrain))
        l2test.append(accuracy_score(lr2.predict(xtest),ytest))
    
    l = [l2,l2test]
    color = ['red','green']
    label = ['l2','l2test']
    
    for i in range(len(l)):
        plt.plot(np.arange(1,10000,500),l[i],color[i],label=label[i])
    plt.legend(loc=4)
    plt.show()
    
    微信截图_20201125002521.png
    l2 = []
    l2test = []
    for i in np.arange(2500,4000,50):
        lrl2 = LR(penalty='l2',solver='lbfgs',C=c,max_iter=i)
        lr2 = lrl2.fit(xtrain,ytrain)
        l2.append(accuracy_score(lr2.predict(xtrain),ytrain))
        l2test.append(accuracy_score(lr2.predict(xtest),ytest))
    
    l = [l2,l2test]
    color = ['red','green']
    label = ['l2','l2test']
    for i in range(len(l)):
        plt.plot(np.arange(2500,4000,50),l[i],color[i],label=label[i])
    plt.legend(loc=4)
    plt.show()
    
    微信截图_20201125002529.png
    lrF = LR(penalty='l2',solver='lbfgs',C=c,max_iter=max_iter)
    lrF = lrF.fit(xtest,ytest)
    #预测测试集概率
    lrF.predict_proba(xtest)
    
    微信截图_20201125003021.png

    相关文章

      网友评论

          本文标题:逻辑回归练习

          本文链接:https://www.haomeiwen.com/subject/nkkgiktx.html