美文网首页
BP神经网络公式推导

BP神经网络公式推导

作者: 雪地团子 | 来源:发表于2018-10-20 13:27 被阅读0次
    BP神经网络.PNG

    正向传播

    第1层
    公式1
    \begin{bmatrix} W_{n_1,n_0}^1&B_{n_1}^1\\ 0&1\\ \end{bmatrix} ^1{\cdot} \begin{bmatrix} H_{n_0}^0\\ 1 \end{bmatrix} = \begin{bmatrix} I_{n_1}^1\\ 1 \end{bmatrix}
    公式2
    \begin{bmatrix} {w_{1,1}^1}&{w_{1,2}^1}&{\cdots}&{w_{1,n_0}^1}&{b_1^1}\\ {w_{2,1}^1}&{w_{2,2}^1}&{\cdots}&{w_{2,n_0}^1}&{b_2^1}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_1,1}^1}&{w_{n_1,2}^1}&{\cdots}&{w_{n_1,n_0}^1}&{b_{n_1}^1}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^0}\\{h_2^0}\\{\vdots}\\{h_{n_0}^0}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^{1}}\\{i_{2}^{1}}\\{\vdots}\\{i_{n_{1}}^{1}}\\1\\ \end{bmatrix}
    公式3
    S(I_{n_1}^1) = H_{n_1}^1
    公式4
    S( \begin{bmatrix} {i_{1}^{1}}\\{i_{2}^{1}}\\{\vdots}\\{i_{n_{1}}^{1}}\\ \end{bmatrix} ) = \begin{bmatrix} {h_1^1}\\{h_2^1}\\{\vdots}\\{h_{n_1}^1}\\ \end{bmatrix}
    第2层
    公式5
    \begin{bmatrix} W_{n_2,n_1}^2&B_{n_2}^2\\ 0&1\\ \end{bmatrix} {\cdot} \begin{bmatrix} H_{n_1}^1\\ 1 \end{bmatrix} = \begin{bmatrix} I_{n_2}^2\\ 1 \end{bmatrix}
    公式6
    \begin{bmatrix} {w_{1,1}^2}&{w_{1,2}^2}&{\cdots}&{w_{1,n_1}^2}&{b_1^2}\\ {w_{2,1}^2}&{w_{2,2}^2}&{\cdots}&{w_{2,n_1}^2}&{b_2^2}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_2,1}^2}&{w_{n_2,2}^2}&{\cdots}&{w_{n_2,n_1}^2}&{b_{n_2}^2}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^1}\\{h_2^1}\\{\vdots}\\{h_{n_1}^1}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^2}\\{i_2^2}\\{\vdots}\\{i_{n_2}^2}\\1\\ \end{bmatrix}
    公式7
    S(I_{n_2}^2) = H_{n_2}^2
    公式8
    S( \begin{bmatrix} {i_{1}^2}\\{i_{2}^2}\\{\vdots}\\{i_{n_2}^2}\\ \end{bmatrix} ) = \begin{bmatrix} {h_1^2}\\{h_2^2}\\{\vdots}\\{h_{n_2}^2}\\ \end{bmatrix}
    第m层
    公式9
    \begin{bmatrix} W_{n_m,n_{m-1}}^m&B_{n_m}^m\\ 0&1\\ \end{bmatrix} {\cdot} \begin{bmatrix} H_{n_{m-1}}^{m-1} \\ 1 \end{bmatrix} = \begin{bmatrix} I_{n_m}^m\\ 1 \end{bmatrix}
    公式10
    \begin{bmatrix} {w_{1,1}^m}&{w_{1,2}^m}&{\cdots}&{w_{1,n_{m-1}}^m}&{b_1^m}\\ {w_{2,1}^m}&{w_{2,2}^m}&{\cdots}&{w_{2,n_{m-1}}^m}&{b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_m,1}^m}&{w_{n_m,2}^m}&{\cdots}&{w_{n_m,n_{m-1}}^m}&{b_{n_m}^m}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^{m-1}}\\{h_2^{m-1}}\\{\vdots}\\{h_{n_{m-1}}^{m-1}}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^m}\\{i_2^m}\\{\vdots}\\{i_{n_m}^m}\\1\\ \end{bmatrix}
    公式11
    S(I_{n_m}^m) = H_{n_m}^m
    公式12
    S( \begin{bmatrix} {i_{1}^m}\\{i_{2}^m}\\{\vdots}\\{i_{n_m}^m}\\ \end{bmatrix} ) = \begin{bmatrix} {h_1^m}\\{h_2^m}\\{\vdots}\\{h_{n_m}^m}\\ \end{bmatrix}

    反向传播

    期望输出

    公式13
    Y_{n_m} = \begin{bmatrix} {y_1}\\{y_2}\\{\vdots}\\{y_{n_m}}\\ \end{bmatrix}

    误差

    公式14
    E_{total} = {\frac{1}{2}(h_1^m-y_1)^2}+{\frac{1}{2}(h_2^m-y_2)^2}+{\dots}+{\frac{1}{2}(h_{n_m}^m-y_{n_m})^2}
    公式15
    \frac{\partial E_{total}}{\partial H_{n_m}^m} = \begin{bmatrix} {h_1^m-y_1}\\{h_2^m-y_2}\\{\vdots}\\{h_{n_m}^m-y_{n_m}}\\ \end{bmatrix}
    公式16
    \frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} = \begin{bmatrix} {\frac{\partial h_1^m}{\partial i_1^m}}&0&0&0\\ 0&\frac{\partial h_2^m}{\partial i_2^m}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{\frac{\partial h_{n_m}^m}{\partial i_{n_m}^m}}\\ \end{bmatrix} = \begin{bmatrix} {h_1^m(1-h_1^m)}&0&0&0\\ 0&{h_2^m(1-h_2^m)}&0&0\\ {\vdots}&{\vdots}&{\ddots}{\vdots}\\ 0&0&0&{h_{n_m}^m(1-h_{n_m}^m)}\\ \end{bmatrix}

    权重偏导

    公式17
    \begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial w_{1,1}^m}&\frac{\partial E_{total}}{\partial w_{1,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{1,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_1^m}\\ \frac{\partial E_{total}}{\partial w_{2,1}^m}&\frac{\partial E_{total}}{\partial w_{2,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{2,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial w_{n_m,1}^m}&\frac{\partial E_{total}}{\partial w_{n_m,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{n_m,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_{n_m}^m}\\ \end{bmatrix}
    公式18
    \begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,1}^m}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial b_1^m}\\ \frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,1}^m}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,1}^m}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial b_{n_m}^m}\\ \end{bmatrix}
    公式19
    \begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_1^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_1^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_1^m}1\\ \frac{\partial E_{total}}{\partial i_2^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_2^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_2^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_2^m}1\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_{n_m}^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_{n_m}^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_{n_m}^m}1\\ \end{bmatrix}
    公式20
    \begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}\\ \frac{\partial E_{total}}{\partial i_2^m}\\ {\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}\\ \end{bmatrix} \begin{bmatrix} {h_1^{m-1}}&{h_2^{m-1}}&{\dots}&{h_{n_{m-1}}^{m-1}}&1\\ \end{bmatrix}
    公式21
    \begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \frac{\partial E_{total}}{\partial I_{n_m}^m} \begin{bmatrix} [H_{n_{m-1}}^{m-1}]^T&1\\ \end{bmatrix} = \frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} \frac{\partial E_{total}}{\partial H_{n_m}^m} \begin{bmatrix} [H_{n_{m-1}}^{m-1}]^T&1\\ \end{bmatrix}
    公式22
    \frac{\partial I_{n_m}^m}{\partial H_{n_{m-1}}^{m-1}} = \begin{bmatrix} {\frac{\partial i_1^m}{\partial h_1^{m-1}}}&\frac{\partial i_2^m}{\partial h_1^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_1^{m-1}}}\\ {\frac{\partial i_1^m}{\partial h_2^{m-1}}}&\frac{\partial i_2^m}{\partial h_2^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_2^{m-1}}}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ {\frac{\partial i_1^m}{\partial h_{n_{m-1}}^{m-1}}}&\frac{\partial i_2^m}{\partial h_{n_{m-1}}^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_{n_{m-1}}^{m-1}}}\\ \end{bmatrix}_{n_{m-1},n_m} = \begin{bmatrix} {w_{1,1}^m}&{w_{1,2}^m}&{\cdots}&{w_{1,n_{m-1}}^m}\\ {w_{2,1}^m}&{w_{2,2}^m}&{\cdots}&{w_{2,n_{m-1}}^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ {w_{n_m,1}^m}&{w_{n_m,2}^m}&{\cdots}&{w_{n_m,n_{m-1}}^m}\\ \end{bmatrix}^T =[W_{n_m,n_{m-1}}^m]^T
    公式23
    \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}} = \frac{\partial I_{n_m}^m}{\partial H_{n_{m-1}}^{m-1}} \frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} \frac{\partial E_{total}}{\partial H_{n_m}^m}


    公式24
    \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} = \begin{bmatrix} {h_1^{m-1}(1-h_1^{m-1})}&0&0&0\\ 0&{h_2^{m-1}(1-h_2^{m-1})}&0&0\\ {\vdots}&{\vdots}&{\ddots}{\vdots}\\ 0&0&0&{h_{n_{m-1}}^{m-1}(1-h_{n_{m-1}}^{m-1})}\\ \end{bmatrix}
    公式25
    \begin{bmatrix} \Delta W_{n_{m-1},n_{m-2}}^{m-1}&\Delta B_{n_{m-1}}^{m-1} \end{bmatrix} = \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}} \begin{bmatrix} [H_{n_{m-2}}^{m-2}]^T&1\\ \end{bmatrix}
    公式26
    \frac{\partial I_{n_{m-1}}^{m-1}}{\partial H_{n_{m-2}}^{m-2}} =[W_{n_{m-1},n_{m-2}}^{m-1}]^T
    公式27
    \frac{\partial E_{total}}{\partial H_{n_{m-2}}^{m-2}} = \frac{\partial I_{n_{m-1}}^{m-1}}{\partial H_{n_{m-2}}^{m-2}} \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}}


    公式28
    \frac{\partial H_{n_{m-2}}^{m-2}}{\partial I_{n_{m-2}}^{m-2}} = \begin{bmatrix} {\frac{\partial h_1^{m-2}}{\partial i_1^{m-2}}}&0&0&0\\ 0&\frac{\partial h_2^{m-2}}{\partial i_2^{m-2}}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{\frac{\partial h_{n_{m-2}}^{m-2}}{\partial i_{n_{m-2}}^{m-2}}}\\ \end{bmatrix} = \begin{bmatrix} {h_1^{m-2}(1-h_1^{m-2})}&0&0&0\\ 0&{h_2^{m-2}(1-h_2^{m-2})}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{h_{n_{m-2}}^{m-2}(1-h_{n_{m-2}}^{m-2})}\\ \end{bmatrix}
    公式29
    \begin{bmatrix} \Delta W_{n_{m-2},n_{m-3}}^{m-2}&\Delta B_{n_{m-2}}^{m-2} \end{bmatrix} = \frac{\partial H_{n_{m-2}}^{m-2}}{\partial I_{n_{m-2}}^{m-2}} \frac{\partial E_{total}}{\partial H_{n_{m-2}}^{m-2}} \begin{bmatrix} [H_{n_{m-3}}^{m-3}]^T&1\\ \end{bmatrix}


    相关文章

      网友评论

          本文标题:BP神经网络公式推导

          本文链接:https://www.haomeiwen.com/subject/nfbjzftx.html