美文网首页
用单因子线性回归演示梯度下降和反向传播

用单因子线性回归演示梯度下降和反向传播

作者: 456又 | 来源:发表于2019-12-25 19:52 被阅读0次

反向传播示例

一、 单层网络的梯度下降和反向传播

单因子线性模型

  1. 线性模型定义

\hat{y}_i=ax_i+b

  1. 损失函数
    Loss = \frac{1}{2}\sum_i^n(\hat{y}_i-y_i)^2=\frac{1}{2}\sum_i^n(ax_i+b-y_i)^2

  2. 损失函数求偏导(偏导代入a的值即为梯度)

\frac{\partial Loss}{\partial a} =\frac{\partial \frac{1}{2}\sum_i^n(ax_i+b-y_i)^2}{\partial a}=\sum_i^n(ax_i+b-y_i)x_i \\ \frac{\partial Loss}{\partial b} =\frac{\partial \frac{1}{2}\sum_i^n(ax_i+b-y_i)^2}{\partial b}=\sum_i^n(ax_i+b-y_i)

  1. 更新参数

    定义学习率lr,代入梯度值,得到a更新后的值
    a=a-lr*\frac{\partial Loss}{\partial a}\\

  1. 代码示例

    1. 构建数据集
    import matplotlib.pyplot as plt
    import torch
    from torch.utils.data import TensorDataset, DataLoader
    
    x = torch.arange(1, 100, 2)
    noise = torch.randn(50)
    y = x * 2 + 10
    # y = y + noise
    
    t_data_set = TensorDataset(x, y)
    
    dl = DataLoader(t_data_set, batch_size=5)
    
    a = torch.tensor(20.0, requires_grad=True)
    b = torch.tensor(30.0, requires_grad=True)
    
    1. epoch 循环

      for epoch in range(100):
          all_loss = 0
          for xt, yt in dl:
              # 损失函数
              y_pred = a * xt + b
              loss = torch.sum((y_pred - yt) ** 2) / 2
              all_loss += loss.data
              # 梯度归零
              if a.grad:
                  a.grad.data.zero_()
                  b.grad.data.zero_()
              # 反向传播      
              loss.backward()
              # 更新数据
              a.data = a.data - a.grad.data * 1e-4
              b.data = b.data - b.grad.data * 1e-3
      

多层网络反向传播

单因子多层网络梯度下降和反向传播

  1. 多层线性模型定义
    复合函数:\hat{y}_i=a_2(a_1x_i+b_1)+b_2\\ 第一层:g(x_i) = a_1x_i+b_1\\ 第二层:f(x_i) = a_2g(x_i)+b_2\\ 需要注意的是,在这里需要把x_i理解为常量,即g(x_1)和g(x_2)是针对于a_1和b_1的不同的方程

  2. 损失函数
    Loss = \frac{1}{2}\sum_i^n(\hat{y}_i-y_i)^2=\frac{1}{2}\sum_i^n(a_2g(x_i)+b_2-y_i)^2=\frac{1}{2}\sum_i^n(a_2(a_1x_i+b_1)+b_2-y_i)^2

  3. 先对a2、b2求导
    \frac{\partial Loss}{\partial a_1} =\frac{\partial \frac{1}{2}\sum_i^n(a_2g(x_i)+b_2-y_i)^2}{\partial a_2}=\sum_i^n(a_2g(x_i)+b_2-y_i)g(x_i) \\ \frac{\partial Loss}{\partial b} =\frac{\partial \frac{1}{2}\sum_i^n(a_2g(x_i)+b_2-y_i)^2}{\partial b_2}=\sum_i^n(a_2g(x_i)+b_2-y_i)

  4. 链式法则说明
    \frac{\partial y}{\partial x}=\frac{\partial y}{\partial u}\frac{\partial u}{\partial x}\\ 若函数\mu=\varphi(t),\nu=\psi(t)在点t可导,z=f(\mu, \nu),在点(\mu, \nu)处偏导连续\\ 则复合函数z=f(\varphi(t),\psi(t))在点t可导,且有链式法则:\\ \frac{dz}{dt} =\frac{\partial z}{\partial \mu}\frac{\partial \mu}{\partial t}+\frac{\partial z}{\partial \nu}\frac{\partial \nu}{\partial t}

  5. 基于链式法则对a1、b1求导
    Loss = \frac{1}{2}\sum_i^n(f(g(x_i))-y_i)^2\\ \frac{\partial Loss}{\partial a_1}=\sum_i^n[(f(g(x_i))-y_i)\frac{\partial f(g(x_i))}{\partial g(x_i)}\frac{\partial g(xi)}{\partial a_1}]=\sum_i^n(f(g(x_i))-y_i)a_2x_i\\ \frac{\partial Loss}{\partial b_1}=\sum_i^n[(f(g(x_i))-y_i)\frac{\partial f(g(x_i))}{\partial g(x_i)}\frac{\partial g(xi)}{\partial b_1}]=\sum_i^n(f(g(x_i))-y_i)a_2

  6. 代码示例

    import seaborn as sns
    import matplotlib.pyplot as plt
    import torch
    from torch.utils.data import TensorDataset, DataLoader
    import pandas as pd
    
    x = torch.arange(1, 100, 2)
    noise = torch.randn(50)
    y = x * 2 + 10
    # y = y + noise
    
    t_data_set = TensorDataset(x, y)
    
    dl = DataLoader(t_data_set, batch_size=5)
    
    # 两层神经网络
    a1 = torch.tensor(20.0, requires_grad=True)
    b1 = torch.tensor(30.0, requires_grad=True)
    
    a2 = torch.tensor(20.0, requires_grad=True)
    b2 = torch.tensor(30.0, requires_grad=True)
    flag = 0
    for epoch in range(1):
        all_loss = 0
        for xt, yt in dl:
            # 损失函数
            liner1 = a1 * xt + b1
            y_pred = a2 * liner1 + b2
            loss = torch.sum((y_pred - yt) ** 2) / 2
            all_loss += loss.data
            # 梯度归零
            if flag != 0:
                a1.grad.data.zero_()
                b1.grad.data.zero_()
                a2.grad.data.zero_()
                b2.grad.data.zero_()
            else:
                flag = 1
            loss.backward()
            print(f"a1:{a1.data}, a2:{a2.data}, b1:{b1.data},b2:{b2.data}")
            print(f"自动计算的梯度:a1_grad:{a1.grad}, a2_grad:{a2.grad}, b1_grad:{b1.grad},b_grad:{b2.grad}")
            print(f"x: {xt}, y: {yt}, 第一层结果:{liner1}, 第二层结果:{y_pred}")
            print(f"计算对a2梯度:{torch.sum(torch.mul(y_pred - yt, liner1))}")
            print(f"loss对liner1的梯度:{a2 * (y_pred - yt)}")
            print(f"liner1对a1的梯度:{xt}")
            print(f"loss对a1的梯度:{torch.sum(torch.mul(xt, a2 * (y_pred - yt)))}")
            print(f"loss对b1的梯度:{torch.sum(a2 * (y_pred - yt))}")
    
            # 更新数据
            a1.data = a1.data - a1.grad.data * 1e-4
            b1.data = b1.data - b1.grad.data * 1e-3
            a2.data = a2.data - a2.grad.data * 1e-4
            b2.data = b2.data - b2.grad.data * 1e-3
            break
        print(f"epoch:{epoch}, now a:{a1}, now b:{b1}, now loss: {all_loss / len(dl)}")
    
    # y_pre = a * x + b
    # plt.plot(x.detach().numpy(), y.detach().numpy(), 'go', label='data', alpha=0.3)
    # plt.plot(x.detach().numpy(), y_pre.detach().numpy(),
    #          label='predicted', alpha=1)
    # plt.legend()
    # plt.show()
    

多层网络多因子梯度下降和反向传播

  1. 代码示例
# 构建数据集
x = torch.randn(100, 2)
noise = torch.randn(100)

# y=10*x1+20*x2+3
y = torch.matmul(x, torch.tensor([2, 1], dtype=torch.float32)) + 3
y = y + noise

t_data_set = TensorDataset(x, y)
dl = DataLoader(t_data_set, batch_size=5)

def dy_lr(epoch_num):
    if epoch_num < 120:
        return 3e-3
    # if 100 <= epoch_num < 1000:
    #     return 1e-5
    else:
        return 1e-3


def cal_grad(line1_output, line2_grad, line1_para):
    """
    求导
    :param line1_output: 上一层的输出 batch_size * 上一层神经元树
    :param line2_grad: 本层的导数 batch_size * 本层 神经元树
    :param line1_para: 本层的参数,即上层神经元数*本层神经元数 line1_output*line1_para = line2
    :return: 本层的导数和参数的导数
    """
    line1_grad = torch.matmul(line1_para, line2_grad.unsqueeze(2))
    line1_a_grad = torch.matmul(line1_output.unsqueeze(2), line2_grad.unsqueeze(1))
    return line1_grad.squeeze(), line1_a_grad.squeeze()


def test_backward():
    """
    计算梯度
    :return:
    """
    line1_a = torch.randn(2, 2, requires_grad=True)
    line1_b = torch.randn(1, 2, requires_grad=True)

    line2_a = torch.randn(2, 2, requires_grad=True)
    line2_b = torch.randn(1, 2, requires_grad=True)

    line3_a = torch.randn(2, 1, requires_grad=True)
    line3_b = torch.randn(1, requires_grad=True)
    for xt, yt in dl:
        line1_out = torch.matmul(xt, line1_a) + line1_b
        line2_out = torch.matmul(line1_out, line2_a) + line2_b
        line3_out = torch.matmul(line2_out, line3_a) + line3_b
        output = line3_out.squeeze()
        loss = torch.sum((output - yt) ** 2)**2 / 2
        loss.backward()
        print("x:{}, y:{}, y_pred:{}".format(xt, yt, output))
        print("line1_a:{},\nline1_b:{},\nline2_a:{},\nline2_b:{},\nline3_a:{},\nline3_b:{}".format(
            line1_a, line1_b, line2_a, line2_b, line3_a, line3_b
        ))
        print("*" * 20)
        print("line1_out:{},\nline2_out:{},\nline3_out:{}".format(
            line1_out, line2_out, line3_out))
        print("*" * 20)
        print("loss:{}".format(loss))
        print("*" * 20)
        print("grad:\nline1_a:{},\nline1_b:{},\nline2_a:{},\nline2_b:{},\nline3_a:{},\nline3_b:{}".format(
            line1_a.grad, line1_b.grad, line2_a.grad, line2_b.grad, line3_a.grad, line3_b.grad
        ))
        print("*" * 20)
        # 1. Loss 对 y_pred求导
        # grad_loss_y_pre = output.detach() - yt
        grad_loss_y_pre = (torch.sum((output - yt) ** 2)*2*(output - yt)).detach()
        print(grad_loss_y_pre)
        # 逐层求导
        grad_loss_line3, grad_loss_line3_a = cal_grad(line2_out, grad_loss_y_pre.unsqueeze(1), line3_a)
        grad_loss_line2, grad_loss_line2_a = cal_grad(line1_out, grad_loss_line3, line2_a)
        grad_loss_line1, grad_loss_line1_a = cal_grad(xt, grad_loss_line2, line1_a)
        print(
            f"grad:line1_a:{grad_loss_line1_a.sum(dim=0)},\nline1:{grad_loss_line1.sum(dim=0)}")
        print(
            f"grad: line2_a:{grad_loss_line2_a.sum(dim=0)},\nline2:{grad_loss_line2.sum(dim=0)}")
        print(
            f"grad: line3_a:{grad_loss_line3_a.sum(dim=0)},\n line3:{grad_loss_line3.sum(dim=0)}")
        break
test_backward()
  1. pytorch 简易写法
# 构建数据集
x = torch.randn(100, 2)
noise = torch.randn(100)

# y=10*x1+20*x2+3
y = torch.matmul(x, torch.tensor([2, 1], dtype=torch.float32)) + 3
y = y + noise

t_data_set = TensorDataset(x, y)
dl = DataLoader(t_data_set, batch_size=5)


def test_by_torch_model():
    """
    通过torch自带模型计算结果
    :return:
    """
    model = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2), nn.Linear(2, 1))
    optim = Adam(model.parameters(), lr=1e-2)
    criterion = nn.MSELoss()
    for epoch in range(50):
        all_loss = 0
        for xt, yt in dl:
            outputs = model(xt)
            optim.zero_grad()
            loss = criterion(yt, outputs.squeeze())
            loss.backward()
            optim.step()
            all_loss += loss.detach().data
        print(f"epoch:{epoch},  now loss: {all_loss / len(dl)}")
    y_pred = model(x).squeeze().detach().numpy()
    plt.plot([i for i in range(len(y_pred))],
             y.detach().numpy(), 'go', label='data', alpha=0.3)
    plt.plot([i for i in range(len(y_pred))],
             y_pred, label='predicted', alpha=1)
    plt.legend()
    plt.show()

# test_backward()
test_by_torch_model()

相关文章

网友评论

      本文标题:用单因子线性回归演示梯度下降和反向传播

      本文链接:https://www.haomeiwen.com/subject/omhhoctx.html