美文网首页
深度卷积生成对抗网络(DCGAN)

深度卷积生成对抗网络(DCGAN)

作者: 风车哟_ | 来源:发表于2018-12-01 20:21 被阅读0次

    DCGAN

    DCGAN相对于原始的GAN并没有太大的改进,只是将全卷积神经网络应用到了GAN中,因此GAN存在的许多问题DCGAN依然有。不知是不是GAN过于难训的原因,论文对许多参数和细节做了详细的说明。在代码实现方面,因为用25个epoch做出的cifar10结果实在太差,因此放上40个epoch的MNIST结果,同样是将输入图像resize成64x64的大小,网络结构和论文一致。不知为何效果不是太令人满意,或许细节方面仍有些问题需要改进。

    细节方面,DCGAN做了如下改进:

    • 取消pooling层。G中用反卷积进行上采样,D中用加入stride的卷积代替pooling

    • batch normalization

    • 去掉FC层,网络为全卷积网络

    • G中使用Relu(最后一层用tanh)

    • D中用LeakyRelu

    网络结构:

    网络结构

    训练细节:

    • 预处理环节,将图像scale到tanh的[-1,1]

    • minibatch训练,batch size=128

    • 所有参数初始化由(0,0.02)的正态分布中随即得到

    • LeakyRelu的斜率是0.2

    • 使用Adam优化器,初始学习率为0.0002,beta_1参数设置为0.5

    代码实现(pytorch):

    import torch
    import torchvision
    import torch.nn as nn
    import torch.backends.cudnn as cudnn
    from torchvision import datasets, transforms
    import torchvision.utils as vutils
    import random
    
    manualSeed = random.randint(1, 10000)
    print("Random Seed: ", manualSeed)
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)
    
    cudnn.benchmark = True
    
    num_epoch = 40
    learning_rate = 0.0002
    z_dimension = 100
    nc = 1  #num of channels
    ndf = 64  #判别网络中的初始feature数
    ngf = 64  #生成网络中的初始feature数
    batchSize = 64
    beta = 0.5
    
    # data_loader
    img_size = 64
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])
    data_loader = torch.utils.data.DataLoader(
        datasets.MNIST(root='data', download=True, transform=transform),
        batch_size=batchSize,
        shuffle=True,
        num_workers=2)
    
    # data_loader = datasets.CIFAR10(root='data',download=True,transform=transform)
    
    
    ##Discriminator
    class Discriminator(nn.Module):
        def __init__(self):
            super(Discriminator, self).__init__()
            self.dis = nn.Sequential(
                ##input is nc*64*64
                nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
    
                ## state size ndf*32*32
                nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 2),
                nn.LeakyReLU(0.2, inplace=True),
    
                ##state size (ndf*2)*16*16
                nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 4),
                nn.LeakyReLU(0.2, inplace=True),
    
                ##state size (ndf*4)*8*8
                nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 8),
                nn.LeakyReLU(0.2, inplace=True),
    
                ##state size (ndf*8)*4*4
                nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
                nn.Sigmoid())
    
        def forward(self, x):
            x = self.dis(x)
            return x.view(-1, 1).squeeze(1)
    
    
    ##Generator
    class Generator(nn.Module):
        def __init__(self):
            super(Generator, self).__init__()
            self.gen = nn.Sequential(
    
                ##input is z_dimension
                nn.ConvTranspose2d(z_dimension, ngf * 8, 4, 1, 0, bias=False),
                nn.BatchNorm2d(ngf * 8),
                nn.ReLU(True),
    
                ##state size (ngf*8)*4*4
                nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ngf * 4),
                nn.ReLU(True),
    
                ##state size (ngf*4)*8*8
                nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ngf * 2),
                nn.ReLU(True),
    
                ##state size (ngf*2)*16*16
                nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ngf),
                nn.ReLU(True),
    
                ##state size ngf*32*32
                nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
                nn.Tanh(),
            )
    
        def forward(self, x):
            x = self.gen(x)
            return x
    
    
    # custom weights initialization called on netG and netD
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)
    
    
    D = Discriminator()
    D.apply(weights_init)
    G = Generator()
    G.apply(weights_init)
    G.cuda()
    D.cuda()
    criterion = nn.BCELoss()
    d_optimizer = torch.optim.Adam(
        D.parameters(), lr=learning_rate, betas=(beta, 0.999))
    g_optimizer = torch.optim.Adam(
        G.parameters(), lr=learning_rate, betas=(beta, 0.999))
    
    
    #print network architercture
    def print_network(net):
        num_params = 0
        for param in net.parameters():
            num_params += param.numel()
        print(net)
        print('Total number of parameters: %d' % num_params)
    
    
    print('---------- Networks architecture -------------')
    print_network(G)
    print_network(D)
    print('-----------------------------------------------')
    
    # r_label = torch.ones(batchSize, 1).cuda()
    # f_label = torch.zeros(batchSize, 1).cuda()
    fixed_noise = torch.randn(batchSize, z_dimension, 1, 1).cuda()
    print('training start!!')
    ##start training
    for epoch in range(num_epoch):
        for iter, data in enumerate(data_loader, 0):
            x_ = data[0].cuda()
            bacthSize = x_.size(0)
            r_label = torch.full((batchSize, ), 1).cuda()
            f_label = torch.full((batchSize, ), 0).cuda()
            ##train Disceiminator
            D.zero_grad()
    
            ##compute the loss of real_image
            real_out = D(x_)
            d_loss_real = criterion(real_out, r_label)
            d_loss_real.backward()
            D_x = real_out.mean().item()
    
            ##compute the loss of fake_image
            noise = torch.randn(batchSize, z_dimension, 1, 1).cuda()
            fake_img = G(noise)
            fake_out = D(fake_img.detach())
            d_loss_fake = criterion(fake_out, f_label)
            d_loss_fake.backward()
            D_G_z1 = fake_out.mean().item()
    
            ##bp and optimize
            d_loss = d_loss_fake + d_loss_real
            d_optimizer.step()
    
            ##train Generotor
            G.zero_grad()
            output = D(fake_img)
            g_loss = criterion(output, r_label)
    
            ##bp and optimize
            g_loss.backward()
            D_G_z2 = output.mean().item()
            g_optimizer.step()
    
            if ((iter + 1) % 100) == 0:
                vutils.save_image(x_, './real_samples.png', normalize=True)
                fake = G(fixed_noise)
                vutils.save_image(
                    fake.detach(),
                    './fake_samples_epoch_%03d.png' % epoch,
                    normalize=True)
                print(
                    '[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
                    % (epoch, num_epoch, iter, len(data_loader), d_loss.item(),
                       g_loss.item(), D_x, D_G_z1, D_G_z2))
        if (epoch+1)%5 == 0:
            addressG = '_G' + str(epoch) + '.pkl'
            addressD = '_D' + str(epoch) + '.pkl'
            torch.save(G.state_dict(), addressG)
            torch.save(D.state_dict(), addressD)
    print("Training finish!... save training results")
    torch.save(G.state_dict(), '_G.pkl')
    torch.save(D.state_dict(), '_D.pkl')
    print("well done!")
    

    训练结果:

    epoch=10

    10

    epoch=20

    20

    epoch=30

    30

    epoch=40

    40

    结果分析:

    可以看到结果如之前所说并不是很令人满意,在epoch=20的时候已初见雏形,但后续的训练并没有明显的效果,而且生成器的偏向非常的明显(1,3等数字完全看不到)。接下来准备阅读WGAN所提及的推土机距离来量化生成器G的生成效果,并尝试找到训练结果不理想的原因。

    相关文章

      网友评论

          本文标题:深度卷积生成对抗网络(DCGAN)

          本文链接:https://www.haomeiwen.com/subject/degrcqtx.html