image.png
# 模型训练
log_model = LogisticRegression()
log_model.fit(new_x, y)
from sklearn.metrics import accuracy_score
# 查看准确率
predict_y = log_model.predict(new_x)
accuracy_score(y, predict_y) #准确率0.8135593220338984
# 模型系数
the0 = log_model.intercept_
the1, the2, the3, the4, the5 = log_model.coef_[0]
print(the0, the1, the2, the3, the4, the5)
# 对x1排序
x1_new = x1.sort_values()
# 求出新的边界
a = the4
b = the2 + the5 * x1_new
c = the0 + the1*x1_new + the3*x1_new* x1_new
# 新的边界
x2_new_boundray = (-b + np.sqrt(b*b -4*a*c)) / (2*a)
x2_new_boundray2 = (-b - np.sqrt(b*b -4*a*c)) / (2*a)
# 添加mask
mask = df["pass"] == 1
plt.figure()
passed = plt.scatter(df["test1"][mask], df["test2"][mask])
failed = plt.scatter(df["test1"][~mask], df["test2"][~mask])
t = plt.plot(x1_new, x2_new_boundray)
t2 = plt.plot(x1_new, x2_new_boundray2)
plt.xlabel("test1")
plt.ylabel("test2")
plt.legend((passed, failed), ("passed", "failed"))
plt.show()
image.png
# 封装边界函数
def f(x):
# 求出新的边界
a = the4
b = the2 + the5 * x
c = the0 + the1*x + the3*x* x
# 新的边界
boundray1 = (-b + np.sqrt(b*b -4*a*c)) / (2*a)
boundray2 = (-b - np.sqrt(b*b -4*a*c)) / (2*a)
return boundray1, boundray2
def boundray(model, x):
"""
:param model: trained model
:param x: sorted x1
:return: boundray1 boundray2 boundray line
"""
# 模型系数
the0 = model.intercept_
the1, the2, the3, the4, the5 = model.coef_[0]
# 求出新的边界
a = the4
b = the2 + the5 * x
c = the0 + the1*x + the3*x* x
# 新的边界
boundray1 = (-b + np.sqrt(b*b -4*a*c)) / (2*a)
boundray1 = (-b - np.sqrt(b*b -4*a*c)) / (2*a)
return boundray1, boundray2
x2_new_boundray1 = []
x2_new_boundray2 = []
for x in x1_new:
x2_new_boundray1.append(boundray(log_model, x)[0])
x2_new_boundray2.append(boundray(log_model, x)[1])
image.png
#生成数据画边界
x1_range = np.array([-0.9 + x/10000 for x in range(19000)])
x2_new_boundray1 = []
x2_new_boundray2 = []
for x in x1_range:
x2_new_boundray1.append(f(x)[0])
x2_new_boundray2.append(f(x)[1])
image.png
网友评论