美文网首页
keras dssm 算法实现

keras dssm 算法实现

作者: 光光小丸子 | 来源:发表于2018-12-28 20:37 被阅读0次

dssm是常用的计算文本相似度的算法模型

import numpy as np
from keras import backend
from keras.layers import Activation, Input
from keras.layers.core import Dense, Lambda, Reshape
from keras.layers.convolutional import Convolution1D
from keras.layers.merge import concatenate, dot
from keras.models import Model

LETTER_GRAM_SIZE = 3 # See section 3.2.
WINDOW_SIZE = 3 # See section 3.2.
TOTAL_LETTER_GRAMS = int(3 * 1e4)
TOTAL_LETTER_GRAMS
WORD_DEPTH = 100
K = 300
L = 128
J = 4
FILTER_LENGTH = 1
query = Input(shape=(None, WORD_DEPTH))
pos_doc = Input(shape=(None, WORD_DEPTH))
neg_docs = [Input(shape=(None, WORD_DEPTH)) for j in range(J)]

query_conv = Convolution1D(K, FILTER_LENGTH, padding="same", input_shape=(None, WORD_DEPTH), activation="tanh")(query)

query_max = Lambda(lambda x: backend.max(x, axis=1), output_shape=(K,))(query_conv) # See section 3.4.
query_sem = Dense(L, activation="tanh", input_dim=K)(query_max) # See section 3.5.

doc_conv = Convolution1D(K, FILTER_LENGTH, padding="same", input_shape=(None, WORD_DEPTH), activation="tanh")

doc_max = Lambda(lambda x: backend.max(x, axis=1), output_shape=(K,))
doc_sem = Dense(L, activation="tanh", input_dim=K)

pos_doc_conv = doc_conv(pos_doc)
neg_doc_convs = [doc_conv(neg_doc) for neg_doc in neg_docs]

pos_doc_max = doc_max(pos_doc_conv)
neg_doc_maxes = [doc_max(neg_doc_conv) for neg_doc_conv in neg_doc_convs]

pos_doc_sem = doc_sem(pos_doc_max)
neg_doc_sems = [doc_sem(neg_doc_max) for neg_doc_max in neg_doc_maxes]
R_Q_D_p = dot([query_sem, pos_doc_sem], axes=1, normalize=True) # See equation (4).
R_Q_D_ns = [dot([query_sem, neg_doc_sem], axes=1, normalize=True) for neg_doc_sem in neg_doc_sems] # See equation (4).

concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns)
concat_Rs = Reshape((J + 1, 1))(concat_Rs)
weight = np.array([1]).reshape(1, 1, 1)
with_gamma = Convolution1D(1, 1, padding="same", input_shape=(J + 1, 1), activation="linear", use_bias=False,
weights=[weight])(concat_Rs) # See equation (5).
with_gamma = Reshape((J + 1,))(with_gamma)
prob = Activation("softmax")(with_gamma) # See equation (5).

model = Model(inputs=[query, pos_doc] + neg_docs, outputs=prob)
model.compile(optimizer="adadelta", loss="categorical_crossentropy")

print(model.summary())

模型结果如下


dssm.jpg

相关文章

网友评论

      本文标题:keras dssm 算法实现

      本文链接:https://www.haomeiwen.com/subject/qgoelqtx.html