在推荐系统中,各种各样的特征里少不了Categorical Features,而它们往往要被转换成Embedding,才能被神经网络处理。通常,我们习惯给每个feature的每个value安排一个统一的embedding size。今天,我们来看一篇给不同value不一样embedding size的文章, Mixed Dimension Embeddings with Application to Memory-Efficient Recommendation Systems,源自Facebook, 相关链接:
问题与模型
- 基于矩阵分解的评分预测
- 基于DLRM的CTR预估
Mixed Dimensions
该方法很简单,可直观理解为将原来的embedding矩阵进行按行分组,每组确定一个embedding size,最后再用投影矩阵将各组投回一样的embedding size
1 如何分组
MF分组方案 CTR分组方案每个Feature一组
2 如何给每组确定embedding size
按popularity给出的
Popularity Based Dimension Selection
Code Snippet
以下是从github翻出的个人觉得有助于理解论文的代码片段。
def md_solver(n, alpha, d0=None, B=None, round_dim=True, k=None):
'''
An external facing function call for mixed-dimension assignment
with the alpha power temperature heuristic
Inputs:
n -- (torch.LongTensor) ; Vector of num of rows for each embedding matrix
alpha -- (torch.FloatTensor); Scalar, non-negative, controls dim. skew
d0 -- (torch.FloatTensor); Scalar, baseline embedding dimension
B -- (torch.FloatTensor); Scalar, parameter budget for embedding layer
round_dim -- (bool); flag for rounding dims to nearest pow of 2
k -- (torch.LongTensor) ; Vector of average number of queries per inference
'''
n, indices = torch.sort(n)
k = k[indices] if k is not None else torch.ones(len(n))
d = alpha_power_rule(n.type(torch.float) / k, alpha, d0=d0, B=B)
if round_dim:
d = pow_2_round(d)
return d
m_spa = md_solver(
n=torch.tensor(ln_emb), #每组num_embeddings,每个feature的value数量
alpha=args.md_temperature, # alpha
d0=m_spa,
B=None,
round_dim=args.md_round_dims,
k=None
).tolist()
给每个block分配embedding_size
def alpha_power_rule(n, alpha, d0=None, B=None):
if d0 is not None:
lamb = d0 * (n[0].type(torch.float) ** alpha)
elif B is not None:
lamb = B / torch.sum(n.type(torch.float) ** (1 - alpha))
else:
raise ValueError("Must specify either d0 or B")
d = torch.ones(len(n)) * lamb * (n.type(torch.float) ** (-alpha))
for i in range(len(d)):
if i == 0 and d0 is not None:
d[i] = d0
else:
d[i] = 1 if d[i] < 1 else d[i]
return (torch.round(d).type(torch.long))
class PrEmbeddingBag(nn.Module):
def __init__(self, num_embeddings, embedding_dim, base_dim):
super(PrEmbeddingBag, self).__init__()
self.embs = nn.EmbeddingBag(
num_embeddings, embedding_dim, mode="sum", sparse=True)
torch.nn.init.xavier_uniform_(self.embs.weight)
if embedding_dim < base_dim:
self.proj = nn.Linear(embedding_dim, base_dim, bias=False) #投影矩阵,投回embedding_dim
torch.nn.init.xavier_uniform_(self.proj.weight)
elif embedding_dim == base_dim:
self.proj = nn.Identity()
else:
raise ValueError(
"Embedding dim " + str(embedding_dim) + " > base dim " + str(base_dim)
)
def forward(self, input, offsets=None, per_sample_weights=None):
return self.proj(self.embs(
input, offsets=offsets, per_sample_weights=per_sample_weights))
如何调用PrEmbeddingBag
def create_emb(self, m, ln):
emb_l = nn.ModuleList() #这是好多个embedding构成的列表
for i in range(0, ln.size):
n = ln[i] # 第i个embedding矩阵的num_embeddings
_m = m[i] # 第i个embedding矩阵的embedding_dim
base = max(m) # 统一后的embedding_dim
EE = PrEmbeddingBag(n, _m, base)
W = np.random.uniform(
low=-np.sqrt(1 / n), high=np.sqrt(1 / n), size=(n, _m)
).astype(np.float32)
EE.embs.weight.data = torch.tensor(W, requires_grad=True)
网友评论