美文网首页
Pytorch 验证码识别

Pytorch 验证码识别

作者: dawsonenjoy | 来源:发表于2019-11-09 12:30 被阅读0次

    思路

    网上有很多验证码的生成方式,而在python中最常用的验证码生成模块就是:captchagvcode,前者的示例可以参考:链接,这里用后者生成验证码并识别(其实原理都一样就是了...)

    导入模块

    如果在colab环境下,需要先安装验证码生成模块,代码如下:

    # !pip install graphic-verification-code
    

    然后导入所有需要的模块

    import torch
    from torch import nn
    import torch.nn.functional as F
    import numpy as np
    import matplotlib.pyplot as plt
    import gvcode
    import random
    

    初始化定义

    这里首先配置一下GPU,然后还有所有的验证码字符集,以及训练的batch等,代码如下:

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # torch.cuda.is_available()
    
    all_char = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
    features = len(all_char)
    batch_size = 32
    

    数据预处理

    为了节约内存,这里验证码使用生成器生成,生成器定义如下:

    def gen(batch_size=32):
        data_img = []
        data_x = torch.zeros(batch_size, 80, 170, 3)
        data_y = torch.zeros(batch_size, 4, features)
        for i in range(batch_size):
            fg_color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))
            bg_color = (random.randint(0, 150), random.randint(0, 150), random.randint(0, 150))
            img, chars = gvcode.generate(size=(170, 80), chars=all_char, bg_color=bg_color, fg_color=fg_color)
            data_img.append((img, chars))
            data_x[i] = torch.tensor(np.array(img))
            for j in range(4):
                data_y[i][j][all_char.index(chars[j])] = 1
                # 验证码4个字符的one-hot
            # break
        data_x.transpose_(1, 3)
        # 转成格式:batch_size, 3, 120, 30,因为输入格式里好像通道要在宽高前面
        data_y.transpose_(0, 1)
        # 转成格式:4, batch_size, features,因为多输出模型,4个(batch_size, features)分别对应验证码4个位置的字符one-hot
        return data_x, data_y, data_img
    
    # 测试生成器
    # x, y, _ = gen(32)
    # x.shape, y.shape
    # plt.imshow(x[0].transpose_(0, 2))
    # plt.show()
    

    定义网络模型

    模型的前面先用几次卷积提取特征,最后由于验证码有4个值,所以用4个全连接分别训练,最后用softmax进行判断,代码如下:

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, 1)
            self.conv2 = nn.Conv2d(32, 32, 3, 1)
            self.conv3 = nn.Conv2d(32, 64, 3, 1)
            self.conv4 = nn.Conv2d(64, 64, 3, 1)
            self.conv5 = nn.Conv2d(64, 128, 3, 1)
            self.conv6 = nn.Conv2d(128, 128, 3, 1)
            self.conv7 = nn.Conv2d(128, 256, 3, 1)
            self.conv8 = nn.Conv2d(256, 256, 3, 1)
            self.maxpool1 = nn.MaxPool2d(2, 2)
            self.maxpool2 = nn.MaxPool2d(2, 2)
            self.maxpool3 = nn.MaxPool2d(2, 2)
            self.maxpool4 = nn.MaxPool2d(2, 2)
            # 多输出模型,输出4个,分别对应4个字母,格式都是:(batch_size, features)
            self.linear1 = nn.Linear(1536, features)
            self.linear2 = nn.Linear(1536, features)
            self.linear3 = nn.Linear(1536, features)
            self.linear4 = nn.Linear(1536, features)
            self.softmax = nn.Softmax()
        
        def forward(self, x):
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = self.maxpool1(x)
            x = F.relu(self.conv3(x))
            x = F.relu(self.conv4(x))
            x = self.maxpool2(x)
            x = F.relu(self.conv5(x))
            x = F.relu(self.conv6(x))
            x = self.maxpool3(x)
            x = F.relu(self.conv7(x))
            x = F.relu(self.conv8(x))
            x = self.maxpool4(x)
            x = torch.flatten(x).reshape(-1, 1536)
    
            # x1 = self.softmax(self.linear1(x))
            # x2 = self.softmax(self.linear2(x))
            # x3 = self.softmax(self.linear3(x))
            # x4 = self.softmax(self.linear4(x))
    
            # 使用交叉熵作为损失函数时不需要softmax
            x1 = self.linear1(x)
            x2 = self.linear2(x)
            x3 = self.linear3(x)
            x4 = self.linear4(x)
            return x1, x2, x3, x4
    
    model = Net().to(device)
    

    定义损失函数和优化器

    loss_fun = nn.MSELoss()
    # loss_fun = nn.CrossEntropyLoss()
    optim = torch.optim.Adam(model.parameters(), lr=0.0001)
    

    训练模型

    model.train()
    li_loss = []
    for epoch in range(1, 5000):
        x, y, _ = gen(batch_size)
        output1, output2, output3, output4 = model(x.to(device))
        # print(output1.shape, y[0].shape)
        loss1 = loss_fun(output1, y[0].to(device))
        # 32,10
        loss2 = loss_fun(output2, y[1].to(device))
        loss3 = loss_fun(output3, y[2].to(device))
        loss4 = loss_fun(output4, y[3].to(device))
    
        # 使用交叉熵
        # loss1 = loss_fun(output1, y[0].argmax(dim=1).to(device))
        # loss2 = loss_fun(output2, y[1].argmax(dim=1).to(device))
        # loss3 = loss_fun(output3, y[2].argmax(dim=1).to(device))
        # loss4 = loss_fun(output4, y[3].argmax(dim=1).to(device))
        loss = loss1 + loss2 + loss3 + loss4
        optim.zero_grad()
        loss.backward()
        optim.step()
        print("epoch:{}, loss1:{}, loss2:{}, loss3:{}, loss4:{}".format(epoch, loss1, loss2, loss3, loss4))
        torch.save(model.state_dict(), "gvcode_{}.mdl".format(batch_size))
        li_loss.append(loss)
    
        if epoch % 30 == 0:
            # 每30趟看一下损失的变化
            plt.plot(li_loss)
            plt.show()
    

    loss变化趋势(loss为交叉熵时)如下:


    测试模型

    产生batch的数据测试看看结果如何,代码如下:

    x, y, z = gen(100)
    model.eval()
    output1, output2, output3, output4 = model(x.to(device))
    li1 = [all_char[each] for each in output1.argmax(dim=1)]
    li2 = [all_char[each] for each in output2.argmax(dim=1)]
    li3 = [all_char[each] for each in output3.argmax(dim=1)]
    li4 = [all_char[each] for each in output4.argmax(dim=1)]
    li_code = np.array(["".join(each) for each in zip(li1, li2, li3, li4)])
    print("acc:", np.sum(li_code == np.array(z)[:, 1]) / len(li_code))
    for i in range(100):
        print(z[i][1], li_code[i])
    

    测试结果如下:



    可以看到精确度在80+左右

    完整代码示例-gvcode识别

    # 安装模块
    # !pip install graphic-verification-code
    
    # -----------------------------
    # 导入模块
    import torch
    from torch import nn
    import torch.nn.functional as F
    import numpy as np
    import matplotlib.pyplot as plt
    import gvcode
    import random
    
    # -----------------------------
    # 设置gpu
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.cuda.is_available()
    
    # -----------------------------
    # 验证码字符集和基本参数设置
    all_char = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
    
    features = len(all_char)
    batch_size = 32
    
    # -----------------------------
    # 定义验证码生成器
    def gen(batch_size=32):
        data_img = []
        data_x = torch.zeros(batch_size, 80, 170, 3)
        data_y = torch.zeros(batch_size, 4, features)
        for i in range(batch_size):
            fg_color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))
            bg_color = (random.randint(0, 150), random.randint(0, 150), random.randint(0, 150))
            img, chars = gvcode.generate(size=(170, 80), chars=all_char, bg_color=bg_color, fg_color=fg_color)
            data_img.append((img, chars))
            data_x[i] = torch.tensor(np.array(img))
            for j in range(4):
                data_y[i][j][all_char.index(chars[j])] = 1
                # 验证码4个字符的one-hot
            # break
        data_x.transpose_(1, 3)
        # 转成格式:batch_size, 3, 120, 30,因为输入格式里好像通道要在宽高前面
        data_y.transpose_(0, 1)
        # 转成格式:4, batch_size, features,因为多输出模型,4个(batch_size, features)分别对应验证码4个位置的字符one-hot
        return data_x, data_y, data_img
    
    # 测试生成器
    # x, y, _ = gen(32)
    # x.shape, y.shape
    # plt.imshow(x[0].transpose_(0, 2))
    # plt.show()
    
    # -----------------------------
    # 定义网络
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, 1)
            self.conv2 = nn.Conv2d(32, 32, 3, 1)
            self.conv3 = nn.Conv2d(32, 64, 3, 1)
            self.conv4 = nn.Conv2d(64, 64, 3, 1)
            self.conv5 = nn.Conv2d(64, 128, 3, 1)
            self.conv6 = nn.Conv2d(128, 128, 3, 1)
            self.conv7 = nn.Conv2d(128, 256, 3, 1)
            self.conv8 = nn.Conv2d(256, 256, 3, 1)
            self.maxpool1 = nn.MaxPool2d(2, 2)
            self.maxpool2 = nn.MaxPool2d(2, 2)
            self.maxpool3 = nn.MaxPool2d(2, 2)
            self.maxpool4 = nn.MaxPool2d(2, 2)
            # 多输出模型,输出4个,分别对应4个字母,格式都是:(batch_size, features)
            self.linear1 = nn.Linear(1536, features)
            self.linear2 = nn.Linear(1536, features)
            self.linear3 = nn.Linear(1536, features)
            self.linear4 = nn.Linear(1536, features)
            self.softmax = nn.Softmax()
        
        def forward(self, x):
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = self.maxpool1(x)
            x = F.relu(self.conv3(x))
            x = F.relu(self.conv4(x))
            x = self.maxpool2(x)
            x = F.relu(self.conv5(x))
            x = F.relu(self.conv6(x))
            x = self.maxpool3(x)
            x = F.relu(self.conv7(x))
            x = F.relu(self.conv8(x))
            x = self.maxpool4(x)
            x = torch.flatten(x).reshape(-1, 1536)
    
            # x1 = self.softmax(self.linear1(x))
            # x2 = self.softmax(self.linear2(x))
            # x3 = self.softmax(self.linear3(x))
            # x4 = self.softmax(self.linear4(x))
    
            # 使用交叉熵作为损失函数时不需要softmax
            x1 = self.linear1(x)
            x2 = self.linear2(x)
            x3 = self.linear3(x)
            x4 = self.linear4(x)
            return x1, x2, x3, x4
    
    model = Net().to(device)
    # model
    
    # -----------------------------
    # 损失函数和优化器
    loss_fun = nn.MSELoss()
    # loss_fun = nn.CrossEntropyLoss()
    optim = torch.optim.Adam(model.parameters(), lr=0.0001)
    
    # -----------------------------
    # 训练数据
    model.train()
    li_loss = []
    for epoch in range(1, 5000):
        x, y, _ = gen(batch_size)
        output1, output2, output3, output4 = model(x.to(device))
        # print(output1.shape, y[0].shape)
        loss1 = loss_fun(output1, y[0].to(device))
        # 32,10
        loss2 = loss_fun(output2, y[1].to(device))
        loss3 = loss_fun(output3, y[2].to(device))
        loss4 = loss_fun(output4, y[3].to(device))
    
        # 使用交叉熵
        # loss1 = loss_fun(output1, y[0].argmax(dim=1).to(device))
        # loss2 = loss_fun(output2, y[1].argmax(dim=1).to(device))
        # loss3 = loss_fun(output3, y[2].argmax(dim=1).to(device))
        # loss4 = loss_fun(output4, y[3].argmax(dim=1).to(device))
        loss = loss1 + loss2 + loss3 + loss4
        optim.zero_grad()
        loss.backward()
        optim.step()
        print("epoch:{}, loss1:{}, loss2:{}, loss3:{}, loss4:{}".format(epoch, loss1, loss2, loss3, loss4))
        torch.save(model.state_dict(), "gvcode_{}.mdl".format(batch_size))
        li_loss.append(loss)
    
        if epoch % 30 == 0:
            plt.plot(li_loss)
            plt.show()
    
    # -----------------------------
    # 测试数据
    # batch_size = 32
    x, y, z = gen(100)
    model.eval()
    output1, output2, output3, output4 = model(x.to(device))
    li1 = [all_char[each] for each in output1.argmax(dim=1)]
    li2 = [all_char[each] for each in output2.argmax(dim=1)]
    li3 = [all_char[each] for each in output3.argmax(dim=1)]
    li4 = [all_char[each] for each in output4.argmax(dim=1)]
    li_code = np.array(["".join(each) for each in zip(li1, li2, li3, li4)])
    print("acc:", np.sum(li_code == np.array(z)[:, 1]) / len(li_code))
    for i in range(100):
        print(z[i][1], li_code[i])
    

    模型优化-BatchNormalization的使用

    使用BN的好处可以参考:https://www.jianshu.com/p/45af48acce40
    这里修改网络模型,在中间添加BN层试试,代码如下:

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, 1)
            self.conv2 = nn.Conv2d(32, 32, 3, 1)
            self.conv3 = nn.Conv2d(32, 64, 3, 1)
            self.conv4 = nn.Conv2d(64, 64, 3, 1)
            self.conv5 = nn.Conv2d(64, 128, 3, 1)
            self.conv6 = nn.Conv2d(128, 128, 3, 1)
            self.conv7 = nn.Conv2d(128, 256, 3, 1)
            self.conv8 = nn.Conv2d(256, 256, 3, 1)
            self.maxpool1 = nn.MaxPool2d(2, 2)
            self.maxpool2 = nn.MaxPool2d(2, 2)
            self.maxpool3 = nn.MaxPool2d(2, 2)
            self.maxpool4 = nn.MaxPool2d(2, 2)
            # 使用batchnorm层
            self.bn1 = nn.BatchNorm2d(32)
            self.bn2 = nn.BatchNorm2d(64)
            self.bn3 = nn.BatchNorm2d(128)
            self.bn4 = nn.BatchNorm2d(256)
            # 多输出模型,输出4个,分别对应4个字母,格式都是:(batch_size, features)
            self.linear1 = nn.Linear(1536, features)
            self.linear2 = nn.Linear(1536, features)
            self.linear3 = nn.Linear(1536, features)
            self.linear4 = nn.Linear(1536, features)
            self.softmax = nn.Softmax()
        
        def forward(self, x):
            x = F.relu(self.conv1(x))
            x = F.relu(self.conv2(x))
            x = self.maxpool1(x)
            x = self.bn1(x)
            x = F.relu(self.conv3(x))
            x = F.relu(self.conv4(x))
            x = self.maxpool2(x)
            x = self.bn2(x)
            x = F.relu(self.conv5(x))
            x = F.relu(self.conv6(x))
            x = self.maxpool3(x)
            x = self.bn3(x)
            x = F.relu(self.conv7(x))
            x = F.relu(self.conv8(x))
            x = self.maxpool4(x)
            x = self.bn4(x)
            x = torch.flatten(x).reshape(-1, 1536)
    
            # x1 = self.softmax(self.linear1(x))
            # x2 = self.softmax(self.linear2(x))
            # x3 = self.softmax(self.linear3(x))
            # x4 = self.softmax(self.linear4(x))
    
            # 使用交叉熵作为损失函数时不需要softmax
            x1 = self.linear1(x)
            x2 = self.linear2(x)
            x3 = self.linear3(x)
            x4 = self.linear4(x)
            return x1, x2, x3, x4
    

    重新训练模型,训练1200轮(loss为交叉熵时)的变化趋势如下:



    可以发现loss的下降速度显著提高。在该模型中添加了BN层以后,其不但收敛加快,而且对于学习率的要求也没有那么严格(可以再提高些学习率试试,比如从0.0001提高到0.003),同时也提高了模型的泛化能力(再去训练别的模型时效果也能很好),总之在适合的情况使用BN,将会带来很好的效果

    相关文章

      网友评论

          本文标题:Pytorch 验证码识别

          本文链接:https://www.haomeiwen.com/subject/ulnibctx.html