一、什么是随机的梯度下降
随机梯度下降.png优点:梯度下降,计算所有的样本,随机梯度下降,计算一部分,不仅计算速度快,同时,效果好。因为,有了随机性,将异常值,影响降到最低
# 随机选取了样本中20个样本,进行计算
# 当数据量比较多时,计算速度更快
# 随机性,计算的更加准确
for i in np.random.choice(np.arange(40),size = 20):#计算20个,返回20个偏导数,求平均值
cost,dw,db = self.loss(X[i,0],y[i,0])
cost_ += cost/20
dw_ += dw/20
db_ += db/20
1-梯度下降更新规则.png
2-sgd随机梯度下降.png
二、原理的推导
-
矩阵乘法形式的变换
-
对损失函数进行导数求解:
-
更新规则有了
-
随机梯度下降:
import numpy as np import matplotlib.pyplot as plt #------------------------------------# X = np.linspace(-2,12,40).reshape(-1,1) w = np.random.randint(2,12,size = 1) b = np.random.randint(-10,10,size = 1) y = X*w + b + np.random.randn(40,1)*2.5 # 将y.reshape(-1)一维的 y = y.reshape(-1) plt.scatter(X,y,color = 'red') # 作为训练数据,增加了一列,截距 X_train = np.concatenate([X,np.ones(shape = (40,1))],axis = 1) #--------------------------------------# def gradient_descent(X,y): m = 10# 从40个样本中随机选取10个样本,计算梯度 theta = np.random.randn(2) # theta中既有斜率,又有截距 last_theta = theta + 0.1 #记录theta更新后,和上一步的误差 precision = 1e-4 #精确度 epsilon = 0.01 #步幅 count= 0 while True: # 当斜率和截距误差小于万分之一时,退出 if (np.abs(theta - last_theta) < precision).all(): break if count > 3000:#死循环执行了3000次 break # 更新 last_theta = theta.copy() # 随机梯度下降,梯度是矩阵计算返回的 index = np.random.choice(np.arange(40),size = m)# index索引,根据随机索引从原数据中取数据 grad = 2/m*X[index].T.dot(X[index].dot(theta) - y[index]) theta -= epsilon*grad count += 1 return theta w_,b_ = gradient_descent(X_train,y) j = lambda x : w_*x + b_ plt.scatter(X[:,0],y,color = 'red') x_test = np.linspace(-2,12,1024) y_ = j(x_test) plt.plot(x_test,y_,color = 'green')
三、代码的实现
(一、随机梯度下降)
导包
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
构建数据
X = np.linspace(-2,12,40).reshape(-1,1)
w = np.random.randint(1,9,size = 1)
b = np.random.randint(-5,5,size = 1)
# 增加了噪声
y = w*X + b + np.random.randn(40,1)*2
plt.scatter(X,y)
<matplotlib.collections.PathCollection at 0x1f560183f88>
output_3_1.png
随机梯度下降
抽样,随机抽取10个
# 没有使用矩阵,实数
class LinearModel(object):
def __init__(self):#初始化,随机给定斜率和截距
self.w = np.random.randn(1)[0]
self.b = np.random.randn(1)[0]
def model(self,x):# 模型
return self.w*x + self.b#一元一次线性方程,模型
def loss(self,x,y):#损失,最小二乘法
cost = (self.model(x) - y)**2 # 损失函数越小越好
# 求解梯度,两个未知数,所以,偏导
d_w = 2*(self.model(x) - y)*x # 斜率w的偏导
d_b = 2*(self.model(x) - y)*1 # 截距b的偏导
return cost,d_w,d_b
def gradient_descent(self,step,d_w,d_b):# 梯度下降
self.w -= step*d_w # 根据梯度,更新斜率
self.b -= step*d_b # 根据梯度,更细截距
def fit(self,X,y):#fit 训练模型,将数据交给模型,寻找规律
precision = 1e-4 # 精确度
last_w = self.w + 0.01
last_b = self.b + 0.01
count = 0
while True:
if count > 2000:
break
if (np.abs(self.w - last_w) < precision) & (np.abs(self.b - last_b) < precision):
break
# 更新斜率和截距
last_w = self.w # 更新之前,先保留,记录
last_b = self.b
cost_ = 0
dw_ = 0
db_ = 0
for i in np.random.choice(np.arange(40),size = 20):#计算10个,返回10个偏导数,求平均值
cost,dw,db = self.loss(X[i,0],y[i,0])
cost_ += cost/20
dw_ += dw/20
db_ += db/20
self.gradient_descent(0.02,dw_,db_)
count += 1
print('----------------------',self.w,self.b)
def predict(self,X):
return self.model(X)
使用随机梯度下降,可视化
X_test = np.linspace(-2,12,512).reshape(-1,1)
linear = LinearModel()
linear.fit(X,y)
y_ = linear.predict(X_test)
plt.plot(X_test,y_,color = 'green')
plt.scatter(X,y,color = 'red')
---------------------- 3.840141287097452 -3.106354282077542
<matplotlib.collections.PathCollection at 0x1f561a3a908>
output_8_2.png
(二、随机梯度下降矩阵)
import numpy as np
import matplotlib.pyplot as plt
X = np.linspace(-2,12,40).reshape(-1,1)
w = np.random.randint(2,12,size = 1)
b = np.random.randint(-10,10,size = 1)
y = X*w + b + np.random.randn(40,1)*2.5
# 将y.reshape(-1)一维的
y = y.reshape(-1)
plt.scatter(X,y,color = 'red')
<matplotlib.collections.PathCollection at 0x21147ef99c8>
output_1_1.png
用方法,实现梯度下降
m是样本的数量
对数据X增加了一列,这一列对应着,截距
# 作为训练数据,增加了一列,截距
X_train = np.concatenate([X,np.ones(shape = (40,1))],axis = 1)
X_train
array([[-2. , 1. ],
[-1.64102564, 1. ],
[-1.28205128, 1. ],
[-0.92307692, 1. ],
[-0.56410256, 1. ],
[-0.20512821, 1. ],
[ 0.15384615, 1. ],
[ 0.51282051, 1. ],
[ 0.87179487, 1. ],
[ 1.23076923, 1. ],
[ 1.58974359, 1. ],
[ 1.94871795, 1. ],
[ 2.30769231, 1. ],
[ 2.66666667, 1. ],
[ 3.02564103, 1. ],
[ 3.38461538, 1. ],
[ 3.74358974, 1. ],
[ 4.1025641 , 1. ],
[ 4.46153846, 1. ],
[ 4.82051282, 1. ],
[ 5.17948718, 1. ],
[ 5.53846154, 1. ],
[ 5.8974359 , 1. ],
[ 6.25641026, 1. ],
[ 6.61538462, 1. ],
[ 6.97435897, 1. ],
[ 7.33333333, 1. ],
[ 7.69230769, 1. ],
[ 8.05128205, 1. ],
[ 8.41025641, 1. ],
[ 8.76923077, 1. ],
[ 9.12820513, 1. ],
[ 9.48717949, 1. ],
[ 9.84615385, 1. ],
[10.20512821, 1. ],
[10.56410256, 1. ],
[10.92307692, 1. ],
[11.28205128, 1. ],
[11.64102564, 1. ],
[12. , 1. ]])
根据矩阵求解的梯度,进行梯度下降
生成系数时,必须考虑形状
def gradient_descent(X,y):
m = 10# 从40个样本中随机选取10个样本,计算梯度
theta = np.random.randn(2) # theta中既有斜率,又有截距
last_theta = theta + 0.1 #记录theta更新后,和上一步的误差
precision = 1e-4 #精确度
epsilon = 0.01 #步幅
count= 0
while True:
# 当斜率和截距误差小于万分之一时,退出
if (np.abs(theta - last_theta) < precision).all():
break
if count > 3000:#死循环执行了3000次
break
# 更新
last_theta = theta.copy()
# 随机梯度下降,梯度是矩阵计算返回的
index = np.random.choice(np.arange(40),size = m)# index索引,根据随机索引从原数据中取数据
grad = 2/m*X[index].T.dot(X[index].dot(theta) - y[index])
theta -= epsilon*grad
count += 1
return theta
w_,b_ = gradient_descent(X_train,y)
j = lambda x : w_*x + b_
plt.scatter(X[:,0],y,color = 'red')
x_test = np.linspace(-2,12,1024)
y_ = j(x_test)
plt.plot(x_test,y_,color = 'green')
[<matplotlib.lines.Line2D at 0x2114b5eb148>]
output_10_1.png
网友评论