美文网首页
python评分卡之LR及混淆矩阵、ROC

python评分卡之LR及混淆矩阵、ROC

作者: 钢能锅 | 来源:发表于2018-12-03 15:27 被阅读0次

    import pandas as pd

    import numpy as np

    from sklearn import linear_model

    # 读取数据

    sports = pd.read_csv(r'C:\Users\Administrator\Desktop\Run or Walk.csv')

    # 提取出所有自变量名称

    predictors = sports.columns[4:]

    # 构建自变量矩阵

    X = sports.ix[:,predictors]

    # 提取y变量值

    y = sports.activity

    # 将数据集拆分为训练集和测试集

    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.25, random_state = 1234)

    # 利用训练集建模

    sklearn_logistic = linear_model.LogisticRegression()

    sklearn_logistic.fit(X_train, y_train)

    # 返回模型的各个参数

    print(sklearn_logistic.intercept_, sklearn_logistic.coef_)

    # 模型预测

    sklearn_predict = sklearn_logistic.predict(X_test)

    # 预测结果统计

    pd.Series(sklearn_predict).value_counts()

    -------------------------------------------------------------------------------------------------------------------------------------------

    # 导入第三方模块

    from sklearn import metrics

    # 混淆矩阵

    cm = metrics.confusion_matrix(y_test, sklearn_predict, labels = [0,1])

    cm

    Accuracy = metrics.scorer.accuracy_score(y_test, sklearn_predict)

    Sensitivity = metrics.scorer.recall_score(y_test, sklearn_predict)

    Specificity = metrics.scorer.recall_score(y_test, sklearn_predict, pos_label=0)

    print('模型准确率为%.2f%%:' %(Accuracy*100))

    print('正例覆盖率为%.2f%%' %(Sensitivity*100))

    print('负例覆盖率为%.2f%%' %(Specificity*100))

    -------------------------------------------------------------------------------------------------------------------------------------------

    # 混淆矩阵的可视化

    # 导入第三方模块

    import seaborn as sns

    import matplotlib.pyplot as plt

    # 绘制热力图

    sns.heatmap(cm, annot = True, fmt = '.2e',cmap = 'GnBu')

    plt.show()

    ------------------------------------------------------------------------------------------------------------------------------------------

    # 绘制ROC曲线

    # 计算真正率和假正率

    fpr,tpr,threshold = metrics.roc_curve(y_test, sm_y_probability)

    # 计算auc的值 

    roc_auc = metrics.auc(fpr,tpr)

    # 绘制面积图

    plt.stackplot(fpr, tpr, color='steelblue', alpha = 0.5, edgecolor = 'black')

    # 添加边际线

    plt.plot(fpr, tpr, color='black', lw = 1)

    # 添加对角线

    plt.plot([0,1],[0,1], color = 'red', linestyle = '--')

    # 添加文本信息

    plt.text(0.5,0.3,'ROC curve (area = %0.2f)' % roc_auc)

    # 添加x轴与y轴标签

    plt.xlabel('1-Specificity')

    plt.ylabel('Sensitivity')

    plt.show()

    -------------------------------------------------------------------------------------------------------------------------------------------

    #ks曲线   链接:https://www.jianshu.com/p/b1b1344bd99f 风控数据分析学习笔记(二)Python建立信用评分卡 - 简书

    fig, ax = plt.subplots()

    ax.plot(1 - threshold, tpr, label='tpr')# ks曲线要按照预测概率降序排列,所以需要1-threshold镜像

    ax.plot(1 - threshold, fpr, label='fpr')

    ax.plot(1 - threshold, tpr-fpr,label='KS')

    plt.xlabel('score')

    plt.title('KS Curve')

    plt.ylim([0.0, 1.0])

    plt.figure(figsize=(20,20))

    legend = ax.legend(loc='upper left')

    plt.show()

    相关文章

      网友评论

          本文标题:python评分卡之LR及混淆矩阵、ROC

          本文链接:https://www.haomeiwen.com/subject/frpycqtx.html