- Himmelblau function
这个函数常常用来试验损失函数参数空间2维时候梯度下降算法的性能好坏,因为他有4个我们已经知道的极小值也是最小值点
- Minima
from matplotlib import pyplot as plt
import torch
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
def himmelblau(x):
return(x[0]**2+x[1]-11)**2+(x[0]+x[1]**2-7)**2
x = np.arange(-6,6,0.1)
y = np.arange(-6,6,0.1)
print('x,y range:',x.shape,y.shape)
X,Y = np.meshgrid(x,y)
print('X,Y maps:',X.shape,Y.shape)
Z = himmelblau([X,Y])
fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X,Y,Z)
ax.view_init(60,-30)
ax.set_xlabel('x')
ax.set_ylabel('y')
print(plt.show())
x,y range: (120,) (120,)
X,Y maps: (120, 120) (120, 120)
image.png
None
x = torch.tensor([0.0,0.0],requires_grad = True)
#定义优化器
optimizer = torch.optim.Adam([x],lr=1e-3)
for step in range(20000):
pred = himmelblau(x)
#梯度归零
optimizer.zero_grad()
#反向传播计算参数梯度
pred.backward()
#梯度下降
optimizer.step()
if step % 2000 == 0:
print('step{}:x={},f(x) = {}'.format(step,x.tolist(),pred.item()))
step0:x=[0.0009999999310821295, 0.0009999999310821295],f(x) = 170.0
step2000:x=[2.3331806659698486, 1.9540694952011108],f(x) = 13.730916023254395
step4000:x=[2.9820079803466797, 2.0270984172821045],f(x) = 0.014858869835734367
step6000:x=[2.999983549118042, 2.0000221729278564],f(x) = 1.1074007488787174e-08
step8000:x=[2.9999938011169434, 2.0000083446502686],f(x) = 1.5572823031106964e-09
step10000:x=[2.999997854232788, 2.000002861022949],f(x) = 1.8189894035458565e-10
step12000:x=[2.9999992847442627, 2.0000009536743164],f(x) = 1.6370904631912708e-11
step14000:x=[2.999999761581421, 2.000000238418579],f(x) = 1.8189894035458565e-12
step16000:x=[3.0, 2.0],f(x) = 0.0
step18000:x=[3.0, 2.0],f(x) = 0.0
#选取初始化值-1.0,1.0
x = torch.tensor([-1.0,1.0],requires_grad = True)
optimizer = torch.optim.Adam([x],lr=1e-3)
for step in range(20000):
pred = himmelblau(x)
#梯度归零
optimizer.zero_grad()
#反向传播计算参数梯度
pred.backward()
#梯度下降
optimizer.step()
if step % 2000 == 0:
print('step{}:x={},f(x) = {}'.format(step,x.tolist(),pred.item()))
step0:x=[-1.0010000467300415, 1.0010000467300415],f(x) = 130.0
step2000:x=[-2.707580089569092, 2.8148136138916016],f(x) = 3.9272661209106445
step4000:x=[-2.805098533630371, 3.1309926509857178],f(x) = 4.166184226050973e-06
step6000:x=[-2.8051130771636963, 3.1313045024871826],f(x) = 3.322384145576507e-09
step8000:x=[-2.8051161766052246, 3.131309747695923],f(x) = 4.567937139654532e-10
step10000:x=[-2.805117607116699, 3.1313116550445557],f(x) = 3.728928277269006e-11
step12000:x=[-2.8051180839538574, 3.131312131881714],f(x) = 5.6843418860808015e-12
step14000:x=[-2.8051180839538574, 3.131312370300293],f(x) = 2.2737367544323206e-13
step16000:x=[-2.8051180839538574, 3.131312608718872],f(x) = 2.2737367544323206e-13
step18000:x=[-2.8051180839538574, 3.131312608718872],f(x) = 2.2737367544323206e-13
算法选取不同的初始化值,也得到或者收敛到不同的极小值可以知道梯度下降是依赖于初始化值的选取的
网友评论