-
案列
数据: 工资和年龄(2个特征)
目标:预测银行会贷款给我多少钱(标签)
考虑:工资和年龄都会影响最终银行贷款的结果那么它们各自有多大的影响呢?(参数)
1-1 -
通俗解释
- X1,X2就是我们的两个特征(年龄,工资)Y是银行最终会借给我们多少钱
-
找到最合适的一条线(想象一个高维)来最好的拟合我们的数据点
2-1
-
数学
3-1
3-2
3-3
3-4
3-5
3-6
3-7
- 完整代码实现(使用sklearn中的数据集)
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
class LinearRegression():
def __init__(self):
self.w = None
def fit(self, X, y):
X = np.insert(X, 0, 1, axis=1)
print (X.shape)
X_ = np.linalg.inv(X.T.dot(X))
self.w = X_.dot(X.T).dot(y)
def predict(self, X):
# Insert constant ones for bias weights
X = np.insert(X, 0, 1, axis=1)
y_pred = X.dot(self.w)
return y_pred
def mean_squared_error(y_true, y_pred):
mse = np.mean(np.power(y_true - y_pred, 2))
return mse
def main():
# Load the diabetes dataset
diabetes = datasets.load_diabetes()
# Use only one feature
X = diabetes.data[:, np.newaxis, 2]
print (X.shape)
# Split the data into training/testing sets
x_train, x_test = X[:-20], X[-20:]
# Split the targets into training/testing sets
y_train, y_test = diabetes.target[:-20], diabetes.target[-20:]
clf = LinearRegression()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
# Print the mean squared error
print ("Mean Squared Error:", mean_squared_error(y_test, y_pred))
# Plot the results
plt.scatter(x_test[:,0], y_test, color='black')
plt.plot(x_test[:,0], y_pred, color='blue', linewidth=3)
plt.show()
main()
输入效果图如下:
5-1
网友评论