画出roc曲线的过程,以鸢尾花为例子
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
iris = load_iris()
data, target = iris.data, iris.target
# 筛选出只含有0和1的target
cond = target != 2
x = data[cond]
y = target[cond]
x = np.concatenate(( x, np.random.randn(100, 800) , axis= 1) # 认为的增加一些噪声
logistic = LogisticRegression()
from sklearn.metrics import auc, roc_curve
from sklearn.model_selection import StratifiedFold
skfold = StratifiedFold(6) # 作用是分割数据为6份
i = 0
for train, test in skfold.split(x,y): # 返回的是train和test的索引下标
logistic.fit(x[train], y[train])
y_ = logistic.predict_probal(x[test]) # 返回的是两个特征值的概率
fpr, tpr, threshold = roc_curve(y[test], y_[:,1]) # 返回的是假阳率, 真阳率, 阈值
auc_ = auc(fpr, tpr)
i += 1
plt.plot(fpr, tpr, label = 'skfold {} auc:{}'.format(i, auc_))
plt.legend()
真阳率:正样本中预测成正样本的概率
假阳率:负样本中预测成正样本的概率
精确率:预测成所有正样本中的准确率
召回率:就是和真阳率一样
网友评论