美文网首页
用单因子线性回归演示梯度下降和反向传播

用单因子线性回归演示梯度下降和反向传播

作者: 456又 | 来源:发表于2019-12-25 19:52 被阅读0次

    反向传播示例

    一、 单层网络的梯度下降和反向传播

    单因子线性模型

    1. 线性模型定义

    \hat{y}_i=ax_i+b

    1. 损失函数
      Loss = \frac{1}{2}\sum_i^n(\hat{y}_i-y_i)^2=\frac{1}{2}\sum_i^n(ax_i+b-y_i)^2

    2. 损失函数求偏导(偏导代入a的值即为梯度)

    \frac{\partial Loss}{\partial a} =\frac{\partial \frac{1}{2}\sum_i^n(ax_i+b-y_i)^2}{\partial a}=\sum_i^n(ax_i+b-y_i)x_i \\ \frac{\partial Loss}{\partial b} =\frac{\partial \frac{1}{2}\sum_i^n(ax_i+b-y_i)^2}{\partial b}=\sum_i^n(ax_i+b-y_i)

    1. 更新参数

      定义学习率lr,代入梯度值,得到a更新后的值
      a=a-lr*\frac{\partial Loss}{\partial a}\\

    1. 代码示例

      1. 构建数据集
      import matplotlib.pyplot as plt
      import torch
      from torch.utils.data import TensorDataset, DataLoader
      
      x = torch.arange(1, 100, 2)
      noise = torch.randn(50)
      y = x * 2 + 10
      # y = y + noise
      
      t_data_set = TensorDataset(x, y)
      
      dl = DataLoader(t_data_set, batch_size=5)
      
      a = torch.tensor(20.0, requires_grad=True)
      b = torch.tensor(30.0, requires_grad=True)
      
      1. epoch 循环

        for epoch in range(100):
            all_loss = 0
            for xt, yt in dl:
                # 损失函数
                y_pred = a * xt + b
                loss = torch.sum((y_pred - yt) ** 2) / 2
                all_loss += loss.data
                # 梯度归零
                if a.grad:
                    a.grad.data.zero_()
                    b.grad.data.zero_()
                # 反向传播      
                loss.backward()
                # 更新数据
                a.data = a.data - a.grad.data * 1e-4
                b.data = b.data - b.grad.data * 1e-3
        

    多层网络反向传播

    单因子多层网络梯度下降和反向传播

    1. 多层线性模型定义
      复合函数:\hat{y}_i=a_2(a_1x_i+b_1)+b_2\\ 第一层:g(x_i) = a_1x_i+b_1\\ 第二层:f(x_i) = a_2g(x_i)+b_2\\ 需要注意的是,在这里需要把x_i理解为常量,即g(x_1)和g(x_2)是针对于a_1和b_1的不同的方程

    2. 损失函数
      Loss = \frac{1}{2}\sum_i^n(\hat{y}_i-y_i)^2=\frac{1}{2}\sum_i^n(a_2g(x_i)+b_2-y_i)^2=\frac{1}{2}\sum_i^n(a_2(a_1x_i+b_1)+b_2-y_i)^2

    3. 先对a2、b2求导
      \frac{\partial Loss}{\partial a_1} =\frac{\partial \frac{1}{2}\sum_i^n(a_2g(x_i)+b_2-y_i)^2}{\partial a_2}=\sum_i^n(a_2g(x_i)+b_2-y_i)g(x_i) \\ \frac{\partial Loss}{\partial b} =\frac{\partial \frac{1}{2}\sum_i^n(a_2g(x_i)+b_2-y_i)^2}{\partial b_2}=\sum_i^n(a_2g(x_i)+b_2-y_i)

    4. 链式法则说明
      \frac{\partial y}{\partial x}=\frac{\partial y}{\partial u}\frac{\partial u}{\partial x}\\ 若函数\mu=\varphi(t),\nu=\psi(t)在点t可导,z=f(\mu, \nu),在点(\mu, \nu)处偏导连续\\ 则复合函数z=f(\varphi(t),\psi(t))在点t可导,且有链式法则:\\ \frac{dz}{dt} =\frac{\partial z}{\partial \mu}\frac{\partial \mu}{\partial t}+\frac{\partial z}{\partial \nu}\frac{\partial \nu}{\partial t}

    5. 基于链式法则对a1、b1求导
      Loss = \frac{1}{2}\sum_i^n(f(g(x_i))-y_i)^2\\ \frac{\partial Loss}{\partial a_1}=\sum_i^n[(f(g(x_i))-y_i)\frac{\partial f(g(x_i))}{\partial g(x_i)}\frac{\partial g(xi)}{\partial a_1}]=\sum_i^n(f(g(x_i))-y_i)a_2x_i\\ \frac{\partial Loss}{\partial b_1}=\sum_i^n[(f(g(x_i))-y_i)\frac{\partial f(g(x_i))}{\partial g(x_i)}\frac{\partial g(xi)}{\partial b_1}]=\sum_i^n(f(g(x_i))-y_i)a_2

    6. 代码示例

      import seaborn as sns
      import matplotlib.pyplot as plt
      import torch
      from torch.utils.data import TensorDataset, DataLoader
      import pandas as pd
      
      x = torch.arange(1, 100, 2)
      noise = torch.randn(50)
      y = x * 2 + 10
      # y = y + noise
      
      t_data_set = TensorDataset(x, y)
      
      dl = DataLoader(t_data_set, batch_size=5)
      
      # 两层神经网络
      a1 = torch.tensor(20.0, requires_grad=True)
      b1 = torch.tensor(30.0, requires_grad=True)
      
      a2 = torch.tensor(20.0, requires_grad=True)
      b2 = torch.tensor(30.0, requires_grad=True)
      flag = 0
      for epoch in range(1):
          all_loss = 0
          for xt, yt in dl:
              # 损失函数
              liner1 = a1 * xt + b1
              y_pred = a2 * liner1 + b2
              loss = torch.sum((y_pred - yt) ** 2) / 2
              all_loss += loss.data
              # 梯度归零
              if flag != 0:
                  a1.grad.data.zero_()
                  b1.grad.data.zero_()
                  a2.grad.data.zero_()
                  b2.grad.data.zero_()
              else:
                  flag = 1
              loss.backward()
              print(f"a1:{a1.data}, a2:{a2.data}, b1:{b1.data},b2:{b2.data}")
              print(f"自动计算的梯度:a1_grad:{a1.grad}, a2_grad:{a2.grad}, b1_grad:{b1.grad},b_grad:{b2.grad}")
              print(f"x: {xt}, y: {yt}, 第一层结果:{liner1}, 第二层结果:{y_pred}")
              print(f"计算对a2梯度:{torch.sum(torch.mul(y_pred - yt, liner1))}")
              print(f"loss对liner1的梯度:{a2 * (y_pred - yt)}")
              print(f"liner1对a1的梯度:{xt}")
              print(f"loss对a1的梯度:{torch.sum(torch.mul(xt, a2 * (y_pred - yt)))}")
              print(f"loss对b1的梯度:{torch.sum(a2 * (y_pred - yt))}")
      
              # 更新数据
              a1.data = a1.data - a1.grad.data * 1e-4
              b1.data = b1.data - b1.grad.data * 1e-3
              a2.data = a2.data - a2.grad.data * 1e-4
              b2.data = b2.data - b2.grad.data * 1e-3
              break
          print(f"epoch:{epoch}, now a:{a1}, now b:{b1}, now loss: {all_loss / len(dl)}")
      
      # y_pre = a * x + b
      # plt.plot(x.detach().numpy(), y.detach().numpy(), 'go', label='data', alpha=0.3)
      # plt.plot(x.detach().numpy(), y_pre.detach().numpy(),
      #          label='predicted', alpha=1)
      # plt.legend()
      # plt.show()
      

    多层网络多因子梯度下降和反向传播

    1. 代码示例
    # 构建数据集
    x = torch.randn(100, 2)
    noise = torch.randn(100)
    
    # y=10*x1+20*x2+3
    y = torch.matmul(x, torch.tensor([2, 1], dtype=torch.float32)) + 3
    y = y + noise
    
    t_data_set = TensorDataset(x, y)
    dl = DataLoader(t_data_set, batch_size=5)
    
    def dy_lr(epoch_num):
        if epoch_num < 120:
            return 3e-3
        # if 100 <= epoch_num < 1000:
        #     return 1e-5
        else:
            return 1e-3
    
    
    def cal_grad(line1_output, line2_grad, line1_para):
        """
        求导
        :param line1_output: 上一层的输出 batch_size * 上一层神经元树
        :param line2_grad: 本层的导数 batch_size * 本层 神经元树
        :param line1_para: 本层的参数,即上层神经元数*本层神经元数 line1_output*line1_para = line2
        :return: 本层的导数和参数的导数
        """
        line1_grad = torch.matmul(line1_para, line2_grad.unsqueeze(2))
        line1_a_grad = torch.matmul(line1_output.unsqueeze(2), line2_grad.unsqueeze(1))
        return line1_grad.squeeze(), line1_a_grad.squeeze()
    
    
    def test_backward():
        """
        计算梯度
        :return:
        """
        line1_a = torch.randn(2, 2, requires_grad=True)
        line1_b = torch.randn(1, 2, requires_grad=True)
    
        line2_a = torch.randn(2, 2, requires_grad=True)
        line2_b = torch.randn(1, 2, requires_grad=True)
    
        line3_a = torch.randn(2, 1, requires_grad=True)
        line3_b = torch.randn(1, requires_grad=True)
        for xt, yt in dl:
            line1_out = torch.matmul(xt, line1_a) + line1_b
            line2_out = torch.matmul(line1_out, line2_a) + line2_b
            line3_out = torch.matmul(line2_out, line3_a) + line3_b
            output = line3_out.squeeze()
            loss = torch.sum((output - yt) ** 2)**2 / 2
            loss.backward()
            print("x:{}, y:{}, y_pred:{}".format(xt, yt, output))
            print("line1_a:{},\nline1_b:{},\nline2_a:{},\nline2_b:{},\nline3_a:{},\nline3_b:{}".format(
                line1_a, line1_b, line2_a, line2_b, line3_a, line3_b
            ))
            print("*" * 20)
            print("line1_out:{},\nline2_out:{},\nline3_out:{}".format(
                line1_out, line2_out, line3_out))
            print("*" * 20)
            print("loss:{}".format(loss))
            print("*" * 20)
            print("grad:\nline1_a:{},\nline1_b:{},\nline2_a:{},\nline2_b:{},\nline3_a:{},\nline3_b:{}".format(
                line1_a.grad, line1_b.grad, line2_a.grad, line2_b.grad, line3_a.grad, line3_b.grad
            ))
            print("*" * 20)
            # 1. Loss 对 y_pred求导
            # grad_loss_y_pre = output.detach() - yt
            grad_loss_y_pre = (torch.sum((output - yt) ** 2)*2*(output - yt)).detach()
            print(grad_loss_y_pre)
            # 逐层求导
            grad_loss_line3, grad_loss_line3_a = cal_grad(line2_out, grad_loss_y_pre.unsqueeze(1), line3_a)
            grad_loss_line2, grad_loss_line2_a = cal_grad(line1_out, grad_loss_line3, line2_a)
            grad_loss_line1, grad_loss_line1_a = cal_grad(xt, grad_loss_line2, line1_a)
            print(
                f"grad:line1_a:{grad_loss_line1_a.sum(dim=0)},\nline1:{grad_loss_line1.sum(dim=0)}")
            print(
                f"grad: line2_a:{grad_loss_line2_a.sum(dim=0)},\nline2:{grad_loss_line2.sum(dim=0)}")
            print(
                f"grad: line3_a:{grad_loss_line3_a.sum(dim=0)},\n line3:{grad_loss_line3.sum(dim=0)}")
            break
    test_backward()
    
    1. pytorch 简易写法
    # 构建数据集
    x = torch.randn(100, 2)
    noise = torch.randn(100)
    
    # y=10*x1+20*x2+3
    y = torch.matmul(x, torch.tensor([2, 1], dtype=torch.float32)) + 3
    y = y + noise
    
    t_data_set = TensorDataset(x, y)
    dl = DataLoader(t_data_set, batch_size=5)
    
    
    def test_by_torch_model():
        """
        通过torch自带模型计算结果
        :return:
        """
        model = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2), nn.Linear(2, 1))
        optim = Adam(model.parameters(), lr=1e-2)
        criterion = nn.MSELoss()
        for epoch in range(50):
            all_loss = 0
            for xt, yt in dl:
                outputs = model(xt)
                optim.zero_grad()
                loss = criterion(yt, outputs.squeeze())
                loss.backward()
                optim.step()
                all_loss += loss.detach().data
            print(f"epoch:{epoch},  now loss: {all_loss / len(dl)}")
        y_pred = model(x).squeeze().detach().numpy()
        plt.plot([i for i in range(len(y_pred))],
                 y.detach().numpy(), 'go', label='data', alpha=0.3)
        plt.plot([i for i in range(len(y_pred))],
                 y_pred, label='predicted', alpha=1)
        plt.legend()
        plt.show()
    
    # test_backward()
    test_by_torch_model()
    

    相关文章

      网友评论

          本文标题:用单因子线性回归演示梯度下降和反向传播

          本文链接:https://www.haomeiwen.com/subject/omhhoctx.html