美文网首页工作生活
全连接的神经网络

全连接的神经网络

作者: 0xFFFFFG | 来源:发表于2019-07-02 15:01 被阅读0次
    import sys, os
    sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
    import numpy as np
    from collections import OrderedDict
    from common.layers import *
    from common.gradient import numerical_gradient
    
    
    class MultiLayerNet:
        """全连接的多层神经网络
    
        Parameters
        ----------
        input_size : 输入大小(MNIST的情况下为784)
        hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
        output_size : 输出大小(MNIST的情况下为10)
        activation : 'relu' or 'sigmoid'
        weight_init_std : 指定权重的标准差(e.g. 0.01)
            指定'relu'或'he'的情况下设定“He的初始值”
            指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
        weight_decay_lambda : Weight Decay(L2范数)的强度
        """
        def __init__(self, input_size, hidden_size_list, output_size,
                     activation='relu', weight_init_std='relu', weight_decay_lambda=0):
            self.input_size = input_size
            self.output_size = output_size
            self.hidden_size_list = hidden_size_list
            self.hidden_layer_num = len(hidden_size_list)
            self.weight_decay_lambda = weight_decay_lambda
            self.params = {}
    
            # 初始化权重
            self.__init_weight(weight_init_std)
    
            # 生成层
            activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
            self.layers = OrderedDict()
            for idx in range(1, self.hidden_layer_num+1):
                self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                          self.params['b' + str(idx)])
                self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
    
            idx = self.hidden_layer_num + 1
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                self.params['b' + str(idx)])
    
            self.last_layer = SoftmaxWithLoss()
    
        def __init_weight(self, weight_init_std):
            """设定权重的初始值
    
            Parameters
            ----------
            weight_init_std : 指定权重的标准差(e.g. 0.01)
                指定'relu'或'he'的情况下设定“He的初始值”
                指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
            """
            all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
            for idx in range(1, len(all_size_list)):
                scale = weight_init_std
                if str(weight_init_std).lower() in ('relu', 'he'):
                    scale = np.sqrt(2.0 / all_size_list[idx - 1])  # 使用ReLU的情况下推荐的初始值
                elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                    scale = np.sqrt(1.0 / all_size_list[idx - 1])  # 使用sigmoid的情况下推荐的初始值
    
                self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
                self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
    
        def predict(self, x):
            for layer in self.layers.values():
                x = layer.forward(x)
    
            return x
    
        def loss(self, x, t):
            """求损失函数
    
            Parameters
            ----------
            x : 输入数据
            t : 教师标签
    
            Returns
            -------
            损失函数的值
            """
            y = self.predict(x)
    
            weight_decay = 0
            for idx in range(1, self.hidden_layer_num + 2):
                W = self.params['W' + str(idx)]
                weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W ** 2)
    
            return self.last_layer.forward(y, t) + weight_decay
    
        def accuracy(self, x, t):
            y = self.predict(x)
            y = np.argmax(y, axis=1)
            if t.ndim != 1 : t = np.argmax(t, axis=1)
    
            accuracy = np.sum(y == t) / float(x.shape[0])
            return accuracy
    
        def numerical_gradient(self, x, t):
            """求梯度(数值微分)
    
            Parameters
            ----------
            x : 输入数据
            t : 教师标签
    
            Returns
            -------
            具有各层的梯度的字典变量
                grads['W1']、grads['W2']、...是各层的权重
                grads['b1']、grads['b2']、...是各层的偏置
            """
            loss_W = lambda W: self.loss(x, t)
    
            grads = {}
            for idx in range(1, self.hidden_layer_num+2):
                grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
                grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
    
            return grads
    
        def gradient(self, x, t):
            """求梯度(误差反向传播法)
    
            Parameters
            ----------
            x : 输入数据
            t : 教师标签
    
            Returns
            -------
            具有各层的梯度的字典变量
                grads['W1']、grads['W2']、...是各层的权重
                grads['b1']、grads['b2']、...是各层的偏置
            """
            # forward
            self.loss(x, t)
    
            # backward
            dout = 1
            dout = self.last_layer.backward(dout)
    
            layers = list(self.layers.values())
            layers.reverse()
            for layer in layers:
                dout = layer.backward(dout)
    
            # 设定
            grads = {}
            for idx in range(1, self.hidden_layer_num+2):
                grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W
                grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db
    
            return grads
    
    import sys, os
    sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
    import numpy as np
    from collections import OrderedDict
    from common.layers import *
    from common.gradient import numerical_gradient
    
    class MultiLayerNetExtend:
        """扩展版的全连接的多层神经网络
        
        具有Weiht Decay、Dropout、Batch Normalization的功能
    
        Parameters
        ----------
        input_size : 输入大小(MNIST的情况下为784)
        hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
        output_size : 输出大小(MNIST的情况下为10)
        activation : 'relu' or 'sigmoid'
        weight_init_std : 指定权重的标准差(e.g. 0.01)
            指定'relu'或'he'的情况下设定“He的初始值”
            指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
        weight_decay_lambda : Weight Decay(L2范数)的强度
        use_dropout: 是否使用Dropout
        dropout_ration : Dropout的比例
        use_batchNorm: 是否使用Batch Normalization
        """
        def __init__(self, input_size, hidden_size_list, output_size,
                     activation='relu', weight_init_std='relu', weight_decay_lambda=0, 
                     use_dropout = False, dropout_ration = 0.5, use_batchnorm=False):
            self.input_size = input_size
            self.output_size = output_size
            self.hidden_size_list = hidden_size_list
            self.hidden_layer_num = len(hidden_size_list)
            self.use_dropout = use_dropout
            self.weight_decay_lambda = weight_decay_lambda
            self.use_batchnorm = use_batchnorm
            self.params = {}
    
            # 初始化权重
            self.__init_weight(weight_init_std)
    
            # 生成层
            activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
            self.layers = OrderedDict()
            for idx in range(1, self.hidden_layer_num+1):
                self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                          self.params['b' + str(idx)])
                if self.use_batchnorm:
                    self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1])
                    self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1])
                    self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
                    
                self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
                
                if self.use_dropout:
                    self.layers['Dropout' + str(idx)] = Dropout(dropout_ration)
    
            idx = self.hidden_layer_num + 1
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
    
            self.last_layer = SoftmaxWithLoss()
    
        def __init_weight(self, weight_init_std):
            """设定权重的初始值
    
            Parameters
            ----------
            weight_init_std : 指定权重的标准差(e.g. 0.01)
                指定'relu'或'he'的情况下设定“He的初始值”
                指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
            """
            all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
            for idx in range(1, len(all_size_list)):
                scale = weight_init_std
                if str(weight_init_std).lower() in ('relu', 'he'):
                    scale = np.sqrt(2.0 / all_size_list[idx - 1])  # 使用ReLU的情况下推荐的初始值
                elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                    scale = np.sqrt(1.0 / all_size_list[idx - 1])  # 使用sigmoid的情况下推荐的初始值
                self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
                self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
    
        def predict(self, x, train_flg=False):
            for key, layer in self.layers.items():
                if "Dropout" in key or "BatchNorm" in key:
                    x = layer.forward(x, train_flg)
                else:
                    x = layer.forward(x)
    
            return x
    
        def loss(self, x, t, train_flg=False):
            """求损失函数
            参数x是输入数据,t是教师标签
            """
            y = self.predict(x, train_flg)
    
            weight_decay = 0
            for idx in range(1, self.hidden_layer_num + 2):
                W = self.params['W' + str(idx)]
                weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
    
            return self.last_layer.forward(y, t) + weight_decay
    
        def accuracy(self, X, T):
            Y = self.predict(X, train_flg=False)
            Y = np.argmax(Y, axis=1)
            if T.ndim != 1 : T = np.argmax(T, axis=1)
    
            accuracy = np.sum(Y == T) / float(X.shape[0])
            return accuracy
    
        def numerical_gradient(self, X, T):
            """求梯度(数值微分)
    
            Parameters
            ----------
            X : 输入数据
            T : 教师标签
    
            Returns
            -------
            具有各层的梯度的字典变量
                grads['W1']、grads['W2']、...是各层的权重
                grads['b1']、grads['b2']、...是各层的偏置
            """
            loss_W = lambda W: self.loss(X, T, train_flg=True)
    
            grads = {}
            for idx in range(1, self.hidden_layer_num+2):
                grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
                grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
                
                if self.use_batchnorm and idx != self.hidden_layer_num+1:
                    grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)])
                    grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)])
    
            return grads
            
        def gradient(self, x, t):
            # forward
            self.loss(x, t, train_flg=True)
    
            # backward
            dout = 1
            dout = self.last_layer.backward(dout)
    
            layers = list(self.layers.values())
            layers.reverse()
            for layer in layers:
                dout = layer.backward(dout)
    
            # 设定
            grads = {}
            for idx in range(1, self.hidden_layer_num+2):
                grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
                grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db
    
                if self.use_batchnorm and idx != self.hidden_layer_num+1:
                    grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
                    grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta
    
            return grads
    

    common.layers:

    from common.functions import *
    from common.util import im2col, col2im
    
    
    class Relu:
        def __init__(self):
            self.mask = None
    
        def forward(self, x):
            self.mask = (x <= 0)
            out = x.copy()
            out[self.mask] = 0
    
            return out
    
        def backward(self, dout):
            dout[self.mask] = 0
            dx = dout
    
            return dx
    
    
    class Sigmoid:
        def __init__(self):
            self.out = None
    
        def forward(self, x):
            out = sigmoid(x)
            self.out = out
            return out
    
        def backward(self, dout):
            dx = dout * (1.0 - self.out) * self.out
    
            return dx
    
    
    class Affine:
        def __init__(self, W, b):
            self.W =W
            self.b = b
            
            self.x = None
            self.original_x_shape = None
            # 权重和偏置参数的导数
            self.dW = None
            self.db = None
    
        def forward(self, x):
            # 对应张量
            self.original_x_shape = x.shape
            x = x.reshape(x.shape[0], -1)
            self.x = x
    
            out = np.dot(self.x, self.W) + self.b
    
            return out
    
        def backward(self, dout):
            dx = np.dot(dout, self.W.T)
            self.dW = np.dot(self.x.T, dout)
            self.db = np.sum(dout, axis=0)
            
            dx = dx.reshape(*self.original_x_shape)  # 还原输入数据的形状(对应张量)
            return dx
    
    
    class SoftmaxWithLoss:
        def __init__(self):
            self.loss = None
            self.y = None # softmax的输出
            self.t = None # 监督数据
    
        def forward(self, x, t):
            self.t = t
            self.y = softmax(x)
            self.loss = cross_entropy_error(self.y, self.t)
            
            return self.loss
    
        def backward(self, dout=1):
            batch_size = self.t.shape[0]
            if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
                dx = (self.y - self.t) / batch_size
            else:
                dx = self.y.copy()
                dx[np.arange(batch_size), self.t] -= 1
                dx = dx / batch_size
            
            return dx
    
    
    class Dropout:
        """
        http://arxiv.org/abs/1207.0580
        """
        def __init__(self, dropout_ratio=0.5):
            self.dropout_ratio = dropout_ratio
            self.mask = None
    
        def forward(self, x, train_flg=True):
            if train_flg:
                self.mask = np.random.rand(*x.shape) > self.dropout_ratio
                return x * self.mask
            else:
                return x * (1.0 - self.dropout_ratio)
    
        def backward(self, dout):
            return dout * self.mask
    
    
    class BatchNormalization:
        """
        http://arxiv.org/abs/1502.03167
        """
        def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
            self.gamma = gamma
            self.beta = beta
            self.momentum = momentum
            self.input_shape = None # Conv层的情况下为4维,全连接层的情况下为2维  
    
            # 测试时使用的平均值和方差
            self.running_mean = running_mean
            self.running_var = running_var  
            
            # backward时使用的中间数据
            self.batch_size = None
            self.xc = None
            self.std = None
            self.dgamma = None
            self.dbeta = None
    
        def forward(self, x, train_flg=True):
            self.input_shape = x.shape
            if x.ndim != 2:
                N, C, H, W = x.shape
                x = x.reshape(N, -1)
    
            out = self.__forward(x, train_flg)
            
            return out.reshape(*self.input_shape)
                
        def __forward(self, x, train_flg):
            if self.running_mean is None:
                N, D = x.shape
                self.running_mean = np.zeros(D)
                self.running_var = np.zeros(D)
                            
            if train_flg:
                mu = x.mean(axis=0)
                xc = x - mu
                var = np.mean(xc**2, axis=0)
                std = np.sqrt(var + 10e-7)
                xn = xc / std
                
                self.batch_size = x.shape[0]
                self.xc = xc
                self.xn = xn
                self.std = std
                self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
                self.running_var = self.momentum * self.running_var + (1-self.momentum) * var            
            else:
                xc = x - self.running_mean
                xn = xc / ((np.sqrt(self.running_var + 10e-7)))
                
            out = self.gamma * xn + self.beta 
            return out
    
        def backward(self, dout):
            if dout.ndim != 2:
                N, C, H, W = dout.shape
                dout = dout.reshape(N, -1)
    
            dx = self.__backward(dout)
    
            dx = dx.reshape(*self.input_shape)
            return dx
    
        def __backward(self, dout):
            dbeta = dout.sum(axis=0)
            dgamma = np.sum(self.xn * dout, axis=0)
            dxn = self.gamma * dout
            dxc = dxn / self.std
            dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
            dvar = 0.5 * dstd / self.std
            dxc += (2.0 / self.batch_size) * self.xc * dvar
            dmu = np.sum(dxc, axis=0)
            dx = dxc - dmu / self.batch_size
            
            self.dgamma = dgamma
            self.dbeta = dbeta
            
            return dx
    
    
    class Convolution:
        def __init__(self, W, b, stride=1, pad=0):
            self.W = W
            self.b = b
            self.stride = stride
            self.pad = pad
            
            # 中间数据(backward时使用)
            self.x = None   
            self.col = None
            self.col_W = None
            
            # 权重和偏置参数的梯度
            self.dW = None
            self.db = None
    
        def forward(self, x):
            FN, C, FH, FW = self.W.shape
            N, C, H, W = x.shape
            out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
            out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
    
            col = im2col(x, FH, FW, self.stride, self.pad)
            col_W = self.W.reshape(FN, -1).T
    
            out = np.dot(col, col_W) + self.b
            out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
    
            self.x = x
            self.col = col
            self.col_W = col_W
    
            return out
    
        def backward(self, dout):
            FN, C, FH, FW = self.W.shape
            dout = dout.transpose(0,2,3,1).reshape(-1, FN)
    
            self.db = np.sum(dout, axis=0)
            self.dW = np.dot(self.col.T, dout)
            self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
    
            dcol = np.dot(dout, self.col_W.T)
            dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
    
            return dx
    
    
    class Pooling:
        def __init__(self, pool_h, pool_w, stride=1, pad=0):
            self.pool_h = pool_h
            self.pool_w = pool_w
            self.stride = stride
            self.pad = pad
            
            self.x = None
            self.arg_max = None
    
        def forward(self, x):
            N, C, H, W = x.shape
            out_h = int(1 + (H - self.pool_h) / self.stride)
            out_w = int(1 + (W - self.pool_w) / self.stride)
    
            col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
            col = col.reshape(-1, self.pool_h*self.pool_w)
    
            arg_max = np.argmax(col, axis=1)
            out = np.max(col, axis=1)
            out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
    
            self.x = x
            self.arg_max = arg_max
    
            return out
    
        def backward(self, dout):
            dout = dout.transpose(0, 2, 3, 1)
            
            pool_size = self.pool_h * self.pool_w
            dmax = np.zeros((dout.size, pool_size))
            dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
            dmax = dmax.reshape(dout.shape + (pool_size,)) 
            
            dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
            dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
            
            return dx
    

    functions.py

    import numpy as np
    
    
    def identity_function(x):
        return x
    
    
    def step_function(x):
        return np.array(x > 0, dtype=np.int)
    
    
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))    
    
    
    def sigmoid_grad(x):
        return (1.0 - sigmoid(x)) * sigmoid(x)
        
    
    def relu(x):
        return np.maximum(0, x)
    
    
    def relu_grad(x):
        grad = np.zeros(x)
        grad[x>=0] = 1
        return grad
        
    
    def softmax(x):
        if x.ndim == 2:
            x = x.T
            x = x - np.max(x, axis=0)
            y = np.exp(x) / np.sum(np.exp(x), axis=0)
            return y.T 
    
        x = x - np.max(x) # 溢出对策
        return np.exp(x) / np.sum(np.exp(x))
    
    
    def mean_squared_error(y, t):
        return 0.5 * np.sum((y-t)**2)
    
    
    def cross_entropy_error(y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)
            
        # 监督数据是one-hot-vector的情况下,转换为正确解标签的索引
        if t.size == y.size:
            t = t.argmax(axis=1)
                 
        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
    
    
    def softmax_loss(X, t):
        y = softmax(X)
        return cross_entropy_error(y, t)
    

    相关文章

      网友评论

        本文标题:全连接的神经网络

        本文链接:https://www.haomeiwen.com/subject/ibypcctx.html