- nn.Conv1d & nn.Conv2d
参考:nn.Conv1d和nn.Conv2d理解
nn.Conv1d:
主要参数:input_channel(看一个博客里说这个是词嵌入的维度), output_channel, kernel_size, stride, padding.
输入:(batch_size, num of input channels,文本长度)
输出:(batch_size, num of output channels(我理解的是用多少个卷积核去卷积),Lout(这里是用卷积的公式[(n+2p-f)/2+1]✖️[(n+2p-f)/2+1]计算出来的))
# 16表示input_channel,是词嵌入的维度,33是out_channel,是用几个卷积核进行卷积,3是表示卷积核的大小,这里是(3*词嵌入的维度)
m = nn.Conv1d(16, 33, 3, stride=2)
# input2 = torch.randn()
# 输入:N*C*L:batch_size为20,C为词嵌入的维度,50为句子的length
# 输出:N*Cout*Lout:Cout我理解的是out_channel的数量
#要注意输入的维度是词嵌入在第二维,还是句子的length在第二维,不行就用permute()或者transpose()修改维度的顺序。
input2 = torch.randn(20, 16, 50)
output2 = m(input2)
print(output2.size())
一个有助于理解的textCNN的图:
image.png
我们可以看到,这里使用了三个不同size的filter对这句话词嵌入后的形式进行卷积,每个size的filter使用两个feature map,卷积之后得到feature map个特征,看图,然后再进行池化(池化的维度也可以看图知道),拼起来作为整个句子的特征。
class CNNlayer(nn.Module):
def __init__(self,vocab_size,embedding_size,kernel_num,kernel_Size,output_size):
super(CNNlayer,self).__init__()
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.kernel_num = kernel_num
self.kernel_size = kernel_Size
self.output_size = output_size
self.embedded = nn.Embedding(self.vocab_size,self.embedding_size)
self.convs = nn.ModuleList([
nn.Sequential(
nn.Conv1d(in_channels=embedding_size, out_channels=kernel_num, kernel_size=one),
nn.ReLU(),
# 因为默认步长是1且没有padding,所以池化时的kernel_size就是Lout的维度。
# nn.MaxPool1d(kernel_size=max_seq_len-one+1)
)
for one in kernel_Size
])
# self.conv1 = nn.Sequential(
# nn.Conv1d(in_channels=self.embedding_size, out_channels=self.kernel_num, kernel_size=self.kernel_size[0]),
# nn.ReLU(),
# # 因为默认步长是1且没有padding,所以池化时的kernel_size就是Lout的维度。
# # nn.MaxPool1d(kernel_size=int(max_seq_len-self.kernel_size[0]+1))
# )
#
# self.conv2 = nn.Sequential(
# nn.Conv1d(in_channels=self.embedding_size, out_channels=self.kernel_num,
# kernel_size=self.kernel_size[1]),
# nn.ReLU(),
# # 因为默认步长是1且没有padding,所以池化时的kernel_size就是Lout的维度。
# # nn.MaxPool1d(kernel_size=int(max_seq_len - self.kernel_size[1] + 1))
# )
# self.conv3 = nn.Sequential(
# nn.Conv1d(in_channels=self.embedding_size, out_channels=self.kernel_num,
# kernel_size=self.kernel_size[2]),
# nn.ReLU(),
# # 因为默认步长是1且没有padding,所以池化时的kernel_size就是Lout的维度。
# # nn.MaxPool1d(kernel_size=int(max_seq_len - self.kernel_size[2] + 1))
# )
self.embedding_dropout = nn.Dropout()
self.fcdropout = nn.Dropout()
# in_features的维度,看那张图可以知道是拼接几个特征之后的,那几个特征由先卷积再池化得到,所以是feature map的数量,不同size的filter的个数这两者的乘积。
in_feature = self.kernel_num*len(self.kernel_size)
self.linear1 = nn.Linear(in_features=in_feature,out_features=in_feature//2)
self.linear2 = nn.Linear(in_features=in_feature//2,out_features=output_size)
def forward(self,x):
out = self.embedded(x)
out = self.embedding_dropout(out)
out = torch.transpose(out,1,2)
# out1 = self.conv1(out)
# out2 = self.conv2(out)
# out3 = self.conv3(out)
# out1 = F.max_pool1d(out1, kernel_size=out1.size(2))
# out2 = F.max_pool1d(out2, kernel_size=out2.size(2))
# out3 = F.max_pool1d(out3, kernel_size=out3.size(2))
# out = torch.cat((out1, out2, out3), 1).squeeze(2)
# print(out.shape)
#
out = [conv(out) for conv in self.convs]
out = [F.max_pool1d(one,kernel_size=one.size(2)) for one in out]
# # 上一步的out中每一个tensor的维度都是(5,2,1),经过cat之后维度变成(5,6,1)
out = torch.cat(out,dim=1).squeeze(2)
out = self.fcdropout(out)
out = self.linear1(F.relu(out))
out = self.linear2(F.relu(out))
return out
#
# model = CNNlayer(3000,3,2,[2,3,4],2)
# # model = CNNlayer(vocab_size=8000,embedding_size=100,kernel_num=2,kernel_Size=[2,3,4],max_seq_len=45,output_size=2)
# # inputs = torch.rand(5,4)
# inputs = np.array(
# [[1,2,3,4],
# [2,2,3,4],
# [3,2,3,4],
# [4,2,3,4],
# [5,2,3,4]
# ]
# )
# inputs = torch.from_numpy(inputs)
# # print(inputs.dtype)
#
# # print(test)
# # print(model)
# pred = model(inputs)
# print(pred)
#
#
debug可以看到维度的变化过程。
p.s.然而在验证集和测试集上震荡的挺厉害的。。效果也不大好。。还不知道是哪里的问题
- 补充:
torch.cat
import torch
import torch.nn as nn
# conv1 = nn.Conv1d(in_channels=100,out_channels=2,kernel_size=2)
# input = torch.randn(32,35,100)
# # batch_size x text_len x embedding_size -> batch_size x embedding_size x text_len
# input = torch.transpose(input,1,2)
# print(input.size())
# out = conv1(input)
# print(out.size())
#
#
# print("--------conv2d------")
#
# conv2 = nn.Conv2d(in_channels=1,out_channels=2,kernel_size=(2,100))
# input2 = torch.randn(32,35,100)
# out2 = conv2(input2)
# print(out2.size())
# # 16表示input_channel,是词嵌入的维度,33是out_channel,是用几个卷积核进行卷积,3是表示卷积核的大小,这里是(3*词嵌入的维度)
# m = nn.Conv1d(100, 2, 3, stride=1)
# # input2 = torch.randn()
# # 输入:N*C*L:batch_size为20,C为词嵌入的维度,50为句子的length
# # 输出:N*Cout*Lout:Cout我理解的是out_channel的数量
# input2 = torch.randn(20,100, 44)
# output2 = m(input2)
# print(output2.size())
#####torch.cat########
#官方文档的例子
x = torch.randn(2, 3)
print(x)
print(torch.cat((x, x, x), 0))
print(torch.cat((x, x, x), 1))
#CNN中拼接的实例
tensor1 = torch.tensor([[[1.4487],
[1.1707]],
[[0.2971],
[0.2723]],
[[0.4521],
[0.0147]],
[[1.0595],
[0.6224]],
[[1.7661],
[0.6258]]])
tensor2 = torch.tensor([[[0.0000],
[0.7214]],
[[0.1606],
[0.4626]],
[[0.0905],
[1.1093]],
[[0.7382],
[0.3790]],
[[0.4005],
[1.5363]]])
tensor3 = torch.tensor([
[[0.0000],
[0.4021]],
[[0.5498],
[0.3947]],
[[0.7512],
[1.2237]],
[[0.0000],
[0.5397]],
[[0.4449],
[0.0000]]])
tensor = [tensor1,tensor2,tensor3]
print(tensor1.shape)
print(tensor2.shape)
print(tensor3.shape)
print(tensor1)
# print(tensor.shape)
out = torch.cat(tensor,dim=1)
print(out.shape)
print(out)
网友评论