思路
网上有很多验证码的生成方式,而在python中最常用的验证码生成模块就是:captcha
和gvcode
,前者的示例可以参考:链接,这里用后者生成验证码并识别(其实原理都一样就是了...)
导入模块
如果在colab环境下,需要先安装验证码生成模块,代码如下:
# !pip install graphic-verification-code
然后导入所有需要的模块
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import gvcode
import random
初始化定义
这里首先配置一下GPU,然后还有所有的验证码字符集,以及训练的batch等,代码如下:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# torch.cuda.is_available()
all_char = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
features = len(all_char)
batch_size = 32
数据预处理
为了节约内存,这里验证码使用生成器生成,生成器定义如下:
def gen(batch_size=32):
data_img = []
data_x = torch.zeros(batch_size, 80, 170, 3)
data_y = torch.zeros(batch_size, 4, features)
for i in range(batch_size):
fg_color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))
bg_color = (random.randint(0, 150), random.randint(0, 150), random.randint(0, 150))
img, chars = gvcode.generate(size=(170, 80), chars=all_char, bg_color=bg_color, fg_color=fg_color)
data_img.append((img, chars))
data_x[i] = torch.tensor(np.array(img))
for j in range(4):
data_y[i][j][all_char.index(chars[j])] = 1
# 验证码4个字符的one-hot
# break
data_x.transpose_(1, 3)
# 转成格式:batch_size, 3, 120, 30,因为输入格式里好像通道要在宽高前面
data_y.transpose_(0, 1)
# 转成格式:4, batch_size, features,因为多输出模型,4个(batch_size, features)分别对应验证码4个位置的字符one-hot
return data_x, data_y, data_img
# 测试生成器
# x, y, _ = gen(32)
# x.shape, y.shape
# plt.imshow(x[0].transpose_(0, 2))
# plt.show()
定义网络模型
模型的前面先用几次卷积提取特征,最后由于验证码有4个值,所以用4个全连接分别训练,最后用softmax进行判断,代码如下:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 32, 3, 1)
self.conv3 = nn.Conv2d(32, 64, 3, 1)
self.conv4 = nn.Conv2d(64, 64, 3, 1)
self.conv5 = nn.Conv2d(64, 128, 3, 1)
self.conv6 = nn.Conv2d(128, 128, 3, 1)
self.conv7 = nn.Conv2d(128, 256, 3, 1)
self.conv8 = nn.Conv2d(256, 256, 3, 1)
self.maxpool1 = nn.MaxPool2d(2, 2)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.maxpool3 = nn.MaxPool2d(2, 2)
self.maxpool4 = nn.MaxPool2d(2, 2)
# 多输出模型,输出4个,分别对应4个字母,格式都是:(batch_size, features)
self.linear1 = nn.Linear(1536, features)
self.linear2 = nn.Linear(1536, features)
self.linear3 = nn.Linear(1536, features)
self.linear4 = nn.Linear(1536, features)
self.softmax = nn.Softmax()
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.maxpool1(x)
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = self.maxpool2(x)
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = self.maxpool3(x)
x = F.relu(self.conv7(x))
x = F.relu(self.conv8(x))
x = self.maxpool4(x)
x = torch.flatten(x).reshape(-1, 1536)
# x1 = self.softmax(self.linear1(x))
# x2 = self.softmax(self.linear2(x))
# x3 = self.softmax(self.linear3(x))
# x4 = self.softmax(self.linear4(x))
# 使用交叉熵作为损失函数时不需要softmax
x1 = self.linear1(x)
x2 = self.linear2(x)
x3 = self.linear3(x)
x4 = self.linear4(x)
return x1, x2, x3, x4
model = Net().to(device)
定义损失函数和优化器
loss_fun = nn.MSELoss()
# loss_fun = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=0.0001)
训练模型
model.train()
li_loss = []
for epoch in range(1, 5000):
x, y, _ = gen(batch_size)
output1, output2, output3, output4 = model(x.to(device))
# print(output1.shape, y[0].shape)
loss1 = loss_fun(output1, y[0].to(device))
# 32,10
loss2 = loss_fun(output2, y[1].to(device))
loss3 = loss_fun(output3, y[2].to(device))
loss4 = loss_fun(output4, y[3].to(device))
# 使用交叉熵
# loss1 = loss_fun(output1, y[0].argmax(dim=1).to(device))
# loss2 = loss_fun(output2, y[1].argmax(dim=1).to(device))
# loss3 = loss_fun(output3, y[2].argmax(dim=1).to(device))
# loss4 = loss_fun(output4, y[3].argmax(dim=1).to(device))
loss = loss1 + loss2 + loss3 + loss4
optim.zero_grad()
loss.backward()
optim.step()
print("epoch:{}, loss1:{}, loss2:{}, loss3:{}, loss4:{}".format(epoch, loss1, loss2, loss3, loss4))
torch.save(model.state_dict(), "gvcode_{}.mdl".format(batch_size))
li_loss.append(loss)
if epoch % 30 == 0:
# 每30趟看一下损失的变化
plt.plot(li_loss)
plt.show()
loss变化趋势(loss为交叉熵时)如下:
测试模型
产生batch的数据测试看看结果如何,代码如下:
x, y, z = gen(100)
model.eval()
output1, output2, output3, output4 = model(x.to(device))
li1 = [all_char[each] for each in output1.argmax(dim=1)]
li2 = [all_char[each] for each in output2.argmax(dim=1)]
li3 = [all_char[each] for each in output3.argmax(dim=1)]
li4 = [all_char[each] for each in output4.argmax(dim=1)]
li_code = np.array(["".join(each) for each in zip(li1, li2, li3, li4)])
print("acc:", np.sum(li_code == np.array(z)[:, 1]) / len(li_code))
for i in range(100):
print(z[i][1], li_code[i])
测试结果如下:
可以看到精确度在80+左右
完整代码示例-gvcode识别
# 安装模块
# !pip install graphic-verification-code
# -----------------------------
# 导入模块
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import gvcode
import random
# -----------------------------
# 设置gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()
# -----------------------------
# 验证码字符集和基本参数设置
all_char = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
features = len(all_char)
batch_size = 32
# -----------------------------
# 定义验证码生成器
def gen(batch_size=32):
data_img = []
data_x = torch.zeros(batch_size, 80, 170, 3)
data_y = torch.zeros(batch_size, 4, features)
for i in range(batch_size):
fg_color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))
bg_color = (random.randint(0, 150), random.randint(0, 150), random.randint(0, 150))
img, chars = gvcode.generate(size=(170, 80), chars=all_char, bg_color=bg_color, fg_color=fg_color)
data_img.append((img, chars))
data_x[i] = torch.tensor(np.array(img))
for j in range(4):
data_y[i][j][all_char.index(chars[j])] = 1
# 验证码4个字符的one-hot
# break
data_x.transpose_(1, 3)
# 转成格式:batch_size, 3, 120, 30,因为输入格式里好像通道要在宽高前面
data_y.transpose_(0, 1)
# 转成格式:4, batch_size, features,因为多输出模型,4个(batch_size, features)分别对应验证码4个位置的字符one-hot
return data_x, data_y, data_img
# 测试生成器
# x, y, _ = gen(32)
# x.shape, y.shape
# plt.imshow(x[0].transpose_(0, 2))
# plt.show()
# -----------------------------
# 定义网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 32, 3, 1)
self.conv3 = nn.Conv2d(32, 64, 3, 1)
self.conv4 = nn.Conv2d(64, 64, 3, 1)
self.conv5 = nn.Conv2d(64, 128, 3, 1)
self.conv6 = nn.Conv2d(128, 128, 3, 1)
self.conv7 = nn.Conv2d(128, 256, 3, 1)
self.conv8 = nn.Conv2d(256, 256, 3, 1)
self.maxpool1 = nn.MaxPool2d(2, 2)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.maxpool3 = nn.MaxPool2d(2, 2)
self.maxpool4 = nn.MaxPool2d(2, 2)
# 多输出模型,输出4个,分别对应4个字母,格式都是:(batch_size, features)
self.linear1 = nn.Linear(1536, features)
self.linear2 = nn.Linear(1536, features)
self.linear3 = nn.Linear(1536, features)
self.linear4 = nn.Linear(1536, features)
self.softmax = nn.Softmax()
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.maxpool1(x)
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = self.maxpool2(x)
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = self.maxpool3(x)
x = F.relu(self.conv7(x))
x = F.relu(self.conv8(x))
x = self.maxpool4(x)
x = torch.flatten(x).reshape(-1, 1536)
# x1 = self.softmax(self.linear1(x))
# x2 = self.softmax(self.linear2(x))
# x3 = self.softmax(self.linear3(x))
# x4 = self.softmax(self.linear4(x))
# 使用交叉熵作为损失函数时不需要softmax
x1 = self.linear1(x)
x2 = self.linear2(x)
x3 = self.linear3(x)
x4 = self.linear4(x)
return x1, x2, x3, x4
model = Net().to(device)
# model
# -----------------------------
# 损失函数和优化器
loss_fun = nn.MSELoss()
# loss_fun = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=0.0001)
# -----------------------------
# 训练数据
model.train()
li_loss = []
for epoch in range(1, 5000):
x, y, _ = gen(batch_size)
output1, output2, output3, output4 = model(x.to(device))
# print(output1.shape, y[0].shape)
loss1 = loss_fun(output1, y[0].to(device))
# 32,10
loss2 = loss_fun(output2, y[1].to(device))
loss3 = loss_fun(output3, y[2].to(device))
loss4 = loss_fun(output4, y[3].to(device))
# 使用交叉熵
# loss1 = loss_fun(output1, y[0].argmax(dim=1).to(device))
# loss2 = loss_fun(output2, y[1].argmax(dim=1).to(device))
# loss3 = loss_fun(output3, y[2].argmax(dim=1).to(device))
# loss4 = loss_fun(output4, y[3].argmax(dim=1).to(device))
loss = loss1 + loss2 + loss3 + loss4
optim.zero_grad()
loss.backward()
optim.step()
print("epoch:{}, loss1:{}, loss2:{}, loss3:{}, loss4:{}".format(epoch, loss1, loss2, loss3, loss4))
torch.save(model.state_dict(), "gvcode_{}.mdl".format(batch_size))
li_loss.append(loss)
if epoch % 30 == 0:
plt.plot(li_loss)
plt.show()
# -----------------------------
# 测试数据
# batch_size = 32
x, y, z = gen(100)
model.eval()
output1, output2, output3, output4 = model(x.to(device))
li1 = [all_char[each] for each in output1.argmax(dim=1)]
li2 = [all_char[each] for each in output2.argmax(dim=1)]
li3 = [all_char[each] for each in output3.argmax(dim=1)]
li4 = [all_char[each] for each in output4.argmax(dim=1)]
li_code = np.array(["".join(each) for each in zip(li1, li2, li3, li4)])
print("acc:", np.sum(li_code == np.array(z)[:, 1]) / len(li_code))
for i in range(100):
print(z[i][1], li_code[i])
模型优化-BatchNormalization的使用
使用BN的好处可以参考:https://www.jianshu.com/p/45af48acce40
这里修改网络模型,在中间添加BN层试试,代码如下:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 32, 3, 1)
self.conv3 = nn.Conv2d(32, 64, 3, 1)
self.conv4 = nn.Conv2d(64, 64, 3, 1)
self.conv5 = nn.Conv2d(64, 128, 3, 1)
self.conv6 = nn.Conv2d(128, 128, 3, 1)
self.conv7 = nn.Conv2d(128, 256, 3, 1)
self.conv8 = nn.Conv2d(256, 256, 3, 1)
self.maxpool1 = nn.MaxPool2d(2, 2)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.maxpool3 = nn.MaxPool2d(2, 2)
self.maxpool4 = nn.MaxPool2d(2, 2)
# 使用batchnorm层
self.bn1 = nn.BatchNorm2d(32)
self.bn2 = nn.BatchNorm2d(64)
self.bn3 = nn.BatchNorm2d(128)
self.bn4 = nn.BatchNorm2d(256)
# 多输出模型,输出4个,分别对应4个字母,格式都是:(batch_size, features)
self.linear1 = nn.Linear(1536, features)
self.linear2 = nn.Linear(1536, features)
self.linear3 = nn.Linear(1536, features)
self.linear4 = nn.Linear(1536, features)
self.softmax = nn.Softmax()
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.maxpool1(x)
x = self.bn1(x)
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = self.maxpool2(x)
x = self.bn2(x)
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = self.maxpool3(x)
x = self.bn3(x)
x = F.relu(self.conv7(x))
x = F.relu(self.conv8(x))
x = self.maxpool4(x)
x = self.bn4(x)
x = torch.flatten(x).reshape(-1, 1536)
# x1 = self.softmax(self.linear1(x))
# x2 = self.softmax(self.linear2(x))
# x3 = self.softmax(self.linear3(x))
# x4 = self.softmax(self.linear4(x))
# 使用交叉熵作为损失函数时不需要softmax
x1 = self.linear1(x)
x2 = self.linear2(x)
x3 = self.linear3(x)
x4 = self.linear4(x)
return x1, x2, x3, x4
重新训练模型,训练1200轮(loss为交叉熵时)的变化趋势如下:
可以发现loss的下降速度显著提高。在该模型中添加了BN层以后,其不但收敛加快,而且对于学习率的要求也没有那么严格(可以再提高些学习率试试,比如从0.0001提高到0.003),同时也提高了模型的泛化能力(再去训练别的模型时效果也能很好),总之在适合的情况使用BN,将会带来很好的效果
网友评论