GitHub: CapsE
论文: A Capsule Network-based Embedding Model for Knowledge Graph Completion and Search Personalization
整个模型思路简介:使用预训练的实体和关系向量作为模型的Embedding层,对成立的三元组进行打分,让这些三元组的打分尽量的靠近1。打分的过程是由胶囊网络进行处理的,第一层是一个卷积操作,第二次是一个全连接操作。
胶囊网络是卷积神经网络的改进版本,内部由使用路由算法。
1.数据预处理(CapsE.py)
#加载训练数据集/评估数据集/测试数据集
#train训练数据集,存在的三元组,标记1
#valid评估数据集,存在的三元组,标记1
#test测试数据集,存在的三元组,标记1
#words_indexes,indexes_words互为逆反的数据,将实体和关系混合在一起,标记出索引
#headTailSelector头尾实体选择器
#entity2id,id2entity互为逆反的数据,单独的实体<->id表
#relation2id,id2relation互为逆反的数据,单独的关系<->id表
train, valid, test, words_indexes, indexes_words, headTailSelector, entity2id, id2entity, relation2id, id2relation = build_data(path=args.data, name=args.name)
#训练数据集大小
data_size = len(train)
#训练数据集批次数据生成器
train_batch = Batch_Loader(train, words_indexes, indexes_words, headTailSelector, entity2id, id2entity, relation2id, id2relation, batch_size=args.batch_size, neg_ratio=args.neg_ratio)
#将批次训练数据的[实体id取出来,作为实体数组entity_array
entity_array = np.array(list(train_batch.indexes_ents.keys()))
#评估数据集的三元组作为x_valid,标记1作为标签y_valid,让这些成立的三元组打分都靠近1
x_valid = np.array(list(valid.keys())).astype(np.int32)
y_valid = np.array(list(valid.values())).astype(np.float32)
#测试数据集的三元组作为x_test,标记1作为标签y_test,让这些成立的三元组打分都靠近1
x_test = np.array(list(test.keys())).astype(np.int32)
y_test = np.array(list(test.values())).astype(np.float32)
2.初始化实体关系Embedding(CapsE.py)
initialization就是将预训练好的实体向量和关系向量整合成一个Embedding(CapsE模型中的实体向量和关系向量是存在一个表里的)
#此处是初始化一个实体和关系混合的Embedding矩阵,即initialization矩阵
initialization = []
print("Using initialization.")
#初始化为[len(words_indexes),embedding_dim]维度的全零矩阵,其中len(words_indexes)=len(enitity)+len(relation)
initialization = np.empty([len(words_indexes), args.embedding_dim]).astype(np.float32)
#将预训练好的实体向量和关系向量加载进来
initEnt, initRel = init_norm_Vector(args.data + args.name + '/relation2vec' + str(args.embedding_dim) + '.init',
args.data + args.name + '/entity2vec' + str(args.embedding_dim) + '.init', args.embedding_dim)
#将实体向量和关系向量按照words_indexes的映射状态融合成一个Embedding矩阵,即initialization.
for _word in words_indexes:
if _word in relation2id:
index = relation2id[_word]
_ind = words_indexes[_word]
initialization[_ind] = initRel[index]
elif _word in entity2id:
index = entity2id[_word]
_ind = words_indexes[_word]
initialization[_ind] = initEnt[index]
else:
print('*****************Error********************!')
break
#此处的initialization就是预训练的实体向量和预训练的关系向量的整合矩阵
initialization = np.array(initialization, dtype=np.float32)
预训练好的实体向量和关系向量
3.用tensorflow搭建训练模型(capsuleNet.py)
import tensorflow as tf
from capsuleLayer import CapsLayer
import math
epsilon = 1e-9
class CapsE(object):
def __init__(self, sequence_length, embedding_size, num_filters, vocab_size, iter_routing, batch_size=256,
num_outputs_secondCaps=1, vec_len_secondCaps=10, initialization=[], filter_size=1, useConstantInit=False):
# Placeholders for input, output
# 申明模型的输入数据占位符,初始化模型的超参
self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x") #shape=(256,3)
self.input_y = tf.placeholder(tf.float32, [batch_size, 1], name="input_y") #shape=(256,1)
self.filter_size = filter_size #1
self.num_filters = num_filters #400
self.sequence_length = sequence_length #3
self.embedding_size = embedding_size #100
self.iter_routing = iter_routing #1
self.num_outputs_secondCaps = num_outputs_secondCaps #1
self.vec_len_secondCaps = vec_len_secondCaps #10
self.batch_size = batch_size #256
self.useConstantInit = useConstantInit #false
# Embedding layer
# Embedding层,既可以随机初始化,也可以加载预训练好的嵌入特征
with tf.name_scope("embedding"):
if initialization == []:
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -math.sqrt(1.0 / embedding_size),
math.sqrt(1.0 / embedding_size), seed=1234), name="W")
else:
self.W = tf.get_variable(name="W2", initializer=initialization) #shape=(40954,100)
#对self.input_x输入的三元组进行映射处理,将self.input_x扩充一个向量维度,生成(batch_size,3,dim)三维
self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) #shape=(256,3,100)
#将三维的self.embedded_chars,扩充一维,变成四维(batch_size,3,dim,1),方便后续的Conv2D进行特征提取
self.X = tf.expand_dims(self.embedded_chars, -1) #shape=(256,3,100,1)
#自定义胶囊层函数
self.build_arch()
#自定义模型损失函数
self.loss()
self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=500)
tf.logging.info('Seting up the main structure')
def build_arch(self):
#The first capsule layer
with tf.variable_scope('FirstCaps_layer'):
self.firstCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps,
with_routing=False, layer_type='CONV', embedding_size=self.embedding_size,
batch_size=self.batch_size, iter_routing=self.iter_routing,
useConstantInit=self.useConstantInit, filter_size=self.filter_size,
num_filters=self.num_filters, sequence_length=self.sequence_length)
self.caps1 = self.firstCaps(self.X, kernel_size=1, stride=1) #shape=(256,100,400,1)
#The second capsule layer
with tf.variable_scope('SecondCaps_layer'):
self.secondCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps,
with_routing=True, layer_type='FC',
batch_size=self.batch_size, iter_routing=self.iter_routing,
embedding_size=self.embedding_size, useConstantInit=self.useConstantInit, filter_size=self.filter_size,
num_filters=self.num_filters, sequence_length=self.sequence_length)
self.caps2 = self.secondCaps(self.caps1) #shape=(256,1,10,1)
self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon) #shape=(256,1,1,1)
def loss(self):
self.scores = tf.reshape(self.v_length, [self.batch_size, 1]) #shape=(256,1)
self.predictions = tf.nn.sigmoid(self.scores)
print("Using square softplus loss")
##注:使用的损失函数不是常用的交叉熵损失函数
losses = tf.square(tf.nn.softplus(self.scores * self.input_y))
self.total_loss = tf.reduce_mean(losses)
4.分批次训练模型(CapsE.py)
with tf.Graph().as_default():
#图的相关配置
session_conf = tf.ConfigProto(allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement)
session_conf.gpu_options.allow_growth = True
sess = tf.Session(config=session_conf)##创建图session的一些配置
with sess.as_default():
global_step = tf.Variable(0, name="global_step", trainable=False)
capse = CapsE(sequence_length=x_valid.shape[1],
initialization=initialization,
embedding_size=args.embedding_dim,
filter_size=args.filter_size,
num_filters=args.num_filters,
vocab_size=len(words_indexes),
iter_routing=args.iter_routing,
batch_size=2*args.batch_size,
num_outputs_secondCaps=args.num_outputs_secondCaps,
vec_len_secondCaps=args.vec_len_secondCaps,
useConstantInit=args.useConstantInit
)##搭建模型
# Define Training procedure
# 选择Adam作为模型训练优化器
optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
# 利用损失函数的情况,反向调参
grads_and_vars = optimizer.compute_gradients(capse.total_loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
# 模型保存路径
out_dir = os.path.abspath(os.path.join(args.run_folder, "runs_CapsE", args.model_name))
print("Writing to {}\n".format(out_dir))
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
checkpoint_prefix = os.path.join(checkpoint_dir, "model")
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
# Initialize all variables 初始化所有参数
sess.run(tf.global_variables_initializer())
def train_step(x_batch, y_batch):
"""
A single training step
"""
feed_dict = {
capse.input_x: x_batch,
capse.input_y: y_batch
}
_, step, loss = sess.run([train_op, global_step, capse.total_loss], feed_dict)
return loss
#分批次训练
num_batches_per_epoch = int((data_size - 1) / args.batch_size) + 1
for epoch in range(args.num_epochs):
for batch_num in range(num_batches_per_epoch):
x_batch, y_batch = train_batch()
loss = train_step(x_batch, y_batch)
current_step = tf.train.global_step(sess, global_step)
#print(loss)
if epoch > 0:
if epoch % args.savedEpochs == 0:
path = capse.saver.save(sess, checkpoint_prefix, global_step=epoch)
print("Saved model checkpoint to {}\n".format(path))
网友评论