三维梯度下降的例子

步骤：

创建三维数据
损失函数计算方式
循环法计算损失函数值
绘制损失函数值的三维图
偏导法使梯度下降
停止条件之外梯度逐渐下降
绘制损失函数下降趋势的散点图

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# 0 np.meshgrid()函数创建网格图
x = np.linspace(-10, 10, 100)
y = np.linspace(-10, 10, 100)
# 生成坐标矩阵
X, Y = np.meshgrid(x, y)
Z = X ** 2 + Y ** 2
fig = plt.figure()
# 获得三维的ax
ax = fig.gca(projection = '3d')
# 显示三维网格图
plt.show()


# 1 创建三维数据
theta0s = np.linspace(-2, 2, 100)
theta1s = np.linspace(-2, 2, 100)
# 创建未初始化数组，内容可以是内存上存在的任何数
COST = np.empty(shape = [100, 100])
TOS, TIS = np.meshgrid(theta0s, theta1s)

# 2 损失函数计算方式
def cost(theta0, theta1, x, y):
    J = 0
    m = len(x)
    for i in range(m):
        h = theta1 * x[i] + theta0
        J += (h - y[i]) ** 2
    J /= (2 * m)
    return J


# 3 低效的for循环法计算损失函数值
# 嵌套的for循环很低效，使用以下的求导操作进行绘图
for i in range(100):
    for j in range(100):
        COST[i, j] = cost(TOS[0, i], TIS[j, 0], pga.distance, pga.accuracy)

# 4 绘制损失函数值的三维图
fig2 = plt.figure()
# 获取对象ax
ax = fig2.gca(projection = '3d')
ax.plot_surface(X = TOS, Y = TIS, Z = COST)
plt.show()
 
    

# 5 偏导法使梯度下降
# theta1的偏导    
def partialCostTheta1(theta0, theta1, x, y):
    h = theta0 + theta1 * x
    diff = (h - y) * x
    partial = diff.sum() / (x.shape[0]) # 样本数x.shape[0]
    return partial
partial = partialCostTheta1(0, 5, pga.distance, pga.accuracy)
# print('theta1的偏导 partial:', partial)

# theta0的偏导
def partialCostTheta0(theta0, theta1, x, y):
    h = theta0 + theta1 * x
    diff = h - y
    partial = diff.sum() / (x.shape[0])
    return partial
partial0 = partialCostTheta0(1, 1, pga.distance, pga.accuracy)

# 6 梯度逐渐下降，满足停止条件时候结束
def gradientDescent(x, y, alpha = 0.1, theta0 = 0, theta1 = 0):
    maxEpoch = 1000
    counter = 0
    c = cost(theta1, theta0, pga.distance, pga.accuracy)
    costs = [c]
    # 收敛值
    convergence_thres = 0.000001
    cprev = c + 10
    theta0s = [theta0]
    theta1s = [theta1]
    
    # 不满足停止条件(未达到收敛条件或未达到最大迭代次数)时
    while ((cprev - c) > convergence_thres) and (counter < maxEpoch):
        cprev = c
        update0 = alpha * partialCostTheta0(theta0, theta1, x, y)
        update1 = alpha * partialCostTheta1(theta0, theta1, x, y)
        
        theta0 -= update0
        theta1 -= update1
        
        theta0s.append(theta0)
        theta1s.append(theta1)
        
        c = cost(theta0, theta1, pga.distance, pga.accuracy)
        
        costs.append(c)
        counter += 1
    return {'theta0': theta0, 'theta1': theta1, 'costs': costs}

obj = gradientDescent(pga.distance, pga.accuracy)
print('Theta0 = ', obj['theta0'])
print('Theta1 = ', obj['theta1'])
descend = gradientDescent(pga.distance, pga.accuracy, alpha = .01)


# 7 绘制损失函数下降趋势的散点图
plt.scatter(range(len(descend['costs'])), descend['costs'])
plt.xlabel('apoch')
plt.ylabel('costs')

plt.show()

结果如下：