美文网首页推荐系统
推荐系统9:MF推荐

推荐系统9:MF推荐

作者: 勇于自信 | 来源:发表于2020-02-26 17:54 被阅读0次
    1.LFM推荐

    思路和ALS算法类似,区别在于,ALS利用坐标下降法,LFM利用梯度下降法
    假设: 评分矩阵𝑅𝑚,𝑛,m个用户对n个物品评分
    𝑟_{𝑢,𝑖}:用户u对物品i的评分
    𝑅𝑚,𝑛 = 𝑃𝑚,𝐹 ∙ 𝑄𝐹,𝑛:R是两个矩阵的乘积
    P:每一行代表一个用户对各隐因子的喜欢程序
    Q:每一列代表一个物品在各个隐因子上的概率分布
    \hat r_{ui}=\sum _{f=1}^FP_{uf}Q_{fi}
    \hat r_{ui}尽可能和r_{ui}相近,min:Loss=\sum_{r_{ui\not=0}}(r_{ui}-\hat r_{ui})^2
    防止过拟合,加入正则项:
    \lambda(\sum P^2_{uf}+\sum Q^2_{fi})
    即损失函数为:
    min:Loss=\sum_{r_{ui\not=0}}(r_{ui}-\hat r_{ui})^2+\lambda(\sum P^2_{uf}+\sum Q^2_{fi})=f(P,Q)
    采用梯度下降法,在t+1轮迭代中,P和Q的值分别是




    随机梯度下降并没有严密的理论证明,实践经验,通常比传统梯度下降法需要更少的迭代次数就可以收敛
    传统梯度:

    随机梯度:


    计算时,只利用用户u对一个物品的评分,而不是利用用户u的所有评分
    LFM推荐demo:
    # coding:utf-8
    
    import random
    import math
    
    try:
        xrange
    except NameError:
        # Python 3 compat
        xrange = range
    
    class LFM(object):
    
        def __init__(self, rating_data, F, alpha=0.1, lmbd=0.1, max_iter=500):
            '''rating_data是list<(user,list<(position,rate)>)>类型
            '''
            self.F = F
            self.P = dict()
            self.Q = dict()
            self.alpha = alpha
            self.lmbd = lmbd
            self.max_iter = max_iter
            self.rating_data = rating_data
    
            '''随机初始化矩阵P和Q'''
            for user, rates in self.rating_data:
                self.P[user] = [random.random() / math.sqrt(self.F)
                                for x in xrange(self.F)]
                for item, _ in rates:
                    if item not in self.Q:
                        self.Q[item] = [random.random() / math.sqrt(self.F)
                                        for x in xrange(self.F)]
    
        def train(self):
            '''随机梯度下降法训练参数P和Q
            '''
            for step in xrange(self.max_iter):
                for user, rates in self.rating_data:
                    for item, rui in rates:
                        hat_rui = self.predict(user, item)
                        err_ui = rui - hat_rui
                        for f in xrange(self.F):
                            self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
                            self.Q[item][f] += self.alpha * (err_ui * self.P[user][f] - self.lmbd * self.Q[item][f])
                self.alpha *= 0.9  # 每次迭代步长要逐步缩小
    
        def predict(self, user, item):
            '''预测用户user对物品item的评分
            '''
            return sum(self.P[user][f] * self.Q[item][f] for f in xrange(self.F))
    
    
    if __name__ == '__main__':
        '''用户有A B C,物品有a b c d'''
        rating_data = list()
        rate_A = [('a', 1.0), ('b', 1.0)]
        rating_data.append(('A', rate_A))
        rate_B = [('b', 1.0), ('c', 1.0)]
        rating_data.append(('B', rate_B))
        rate_C = [('c', 1.0), ('d', 1.0)]
        rating_data.append(('C', rate_C))
    
        lfm = LFM(rating_data, 2)
        lfm.train()
        for item in ['a', 'b', 'c', 'd']:
            print(item, lfm.predict('A', item))  # 计算用户A对各个物品的喜好程度
    

    运行代码,推荐结果:
    a 0.6772442553573618
    b 0.7600624403943927
    c 0.9328792453570258
    d 0.7089159198323267

    2.SVD推荐

    LFM没有考虑可观的“偏置”,所以带偏置的LFM称为SVD
    偏置:事件固有的,不受外界影响的属性



    • 𝜇:训练集中所有评分的平均值
    • 𝑏𝑢:用户偏置,代表一个用户评分的平均值
    • 𝑏𝑖:物品偏置,代表一个物品被评分的平均值
    更新方法:

    SVD实现demo
    # coding:utf-8
    __author__ = "orisun"
    
    import random
    import math
    
    try:
        xrange
    except NameError:
        # Python 3 compat
        xrange = range
    
    class BiasLFM(object):
    
        def __init__(self, rating_data, F, alpha=0.1, lmbd=0.1, max_iter=500):
            '''rating_data是list<(user,list<(position,rate)>)>类型
            '''
            self.F = F
            self.P = dict()
            self.Q = dict()
            self.bu = dict()
            self.bi = dict()
            self.alpha = alpha
            self.lmbd = lmbd
            self.max_iter = max_iter
            self.rating_data = rating_data
            self.mu = 0.0
    
            '''随机初始化矩阵P和Q'''
            cnt = 0
            for user, rates in self.rating_data:
                self.P[user] = [random.random() / math.sqrt(self.F)
                                for x in xrange(self.F)]
                self.bu[user] = 0
                cnt += len(rates)
                for item, rate in rates:
                    self.mu += rate
                    if item not in self.Q:
                        self.Q[item] = [random.random() / math.sqrt(self.F)
                                        for x in xrange(self.F)]
                    self.bi[item] = 0
            self.mu /= cnt
    
        def train(self):
            '''随机梯度下降法训练参数P和Q
            '''
            for step in xrange(self.max_iter):
                for user, rates in self.rating_data:
                    for item, rui in rates:
                        hat_rui = self.predict(user, item)
                        err_ui = rui - hat_rui
                        self.bu[user] += self.alpha * (err_ui - self.lmbd * self.bu[user])
                        self.bi[item] += self.alpha * (err_ui - self.lmbd * self.bi[item])
                        for f in xrange(self.F):
                            self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
                            self.Q[item][f] += self.alpha * (err_ui * self.P[user][f] - self.lmbd * self.Q[item][f])
                self.alpha *= 0.9  # 每次迭代步长要逐步缩小
    
        def predict(self, user, item):
            '''预测用户user对物品item的评分
            '''
            return sum(self.P[user][f] * self.Q[item][f] for f in xrange(self.F)) + self.bu[user] + self.bi[item] + self.mu
    
    
    if __name__ == '__main__':
        '''用户有A B C,物品有a b c d'''
        rating_data = list()
        rate_A = [('a', 1.0), ('b', 1.0)]
        rating_data.append(('A', rate_A))
        rate_B = [('b', 1.0), ('c', 1.0)]
        rating_data.append(('B', rate_B))
        rate_C = [('c', 1.0), ('d', 1.0)]
        rating_data.append(('C', rate_C))
    
        lfm = BiasLFM(rating_data, 2)
        lfm.train()
        for item in ['a', 'b', 'c', 'd']:
            print(item, lfm.predict('A', item))  # 计算用户A对各个物品的喜好程度
    

    运行代码,推荐结果:
    a 1.0112206656603693
    b 0.9885043037157129
    c 0.9868790391421494
    d 1.00612285421106

    3.SVD++推荐

    SVD++:任何用户只要对物品i有过评分,无论评分多少,已经在一定程度上反映了用户对各个隐因子的喜好
    程度𝑦𝑖 = (𝑦𝑖1, 𝑦𝑖2, … , 𝑦𝑖𝐹),y是物品携带的属性





    • 𝑁(𝑢):用户u评价过的物品集合
    • 𝑏𝑢:用户偏置,代表一个用户评分的平均值
    • 𝑏𝑖:物品偏置,代表一个物品被评分的平均值

    SVD++推荐demo
    # coding:utf-8
    __author__ = "orisun"
    
    import random
    import math
    
    try:
        xrange
    except NameError:
        # Python 3 compat
        xrange = range
    
    class SVDPP(object):
    
        def __init__(self, rating_data, F, alpha=0.1, lmbd=0.1, max_iter=500):
            '''rating_data是list<(user,list<(position,rate)>)>类型
            '''
            self.F = F
            self.P = dict()
            self.Q = dict()
            self.Y = dict()
            self.bu = dict()
            self.bi = dict()
            self.alpha = alpha
            self.lmbd = lmbd
            self.max_iter = max_iter
            self.rating_data = rating_data
            self.mu = 0.0
    
            '''随机初始化矩阵P、Q、Y'''
            cnt = 0
            for user, rates in self.rating_data:
                self.P[user] = [random.random() / math.sqrt(self.F)
                                for x in xrange(self.F)]
                self.bu[user] = 0
                cnt += len(rates)
                for item, rate in rates:
                    self.mu += rate
                    if item not in self.Q:
                        self.Q[item] = [random.random() / math.sqrt(self.F)
                                        for x in xrange(self.F)]
                    if item not in self.Y:
                        self.Y[item] = [random.random() / math.sqrt(self.F)
                                        for x in xrange(self.F)]
                    self.bi[item] = 0
            self.mu /= cnt
    
        def train(self):
            '''随机梯度下降法训练参数P和Q
            '''
            for step in xrange(self.max_iter):
                for user, rates in self.rating_data:
                    z = [0.0 for f in xrange(self.F)]
                    for item, _ in rates:
                        for f in xrange(self.F):
                            z[f] += self.Y[item][f]
                    ru = 1.0 / math.sqrt(1.0 * len(rates))
                    s = [0.0 for f in xrange(self.F)]
                    for item, rui in rates:
                        hat_rui = self.predict(user, item, rates)
                        err_ui = rui - hat_rui
                        self.bu[user] += self.alpha * (err_ui - self.lmbd * self.bu[user])
                        self.bi[item] += self.alpha * (err_ui - self.lmbd * self.bi[item])
                        for f in xrange(self.F):
                            s[f] += self.Q[item][f] * err_ui
                            self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
                            self.Q[item][f] += self.alpha * (
                                        err_ui * (self.P[user][f] + z[f] * ru) - self.lmbd * self.Q[item][f])
                    for item, _ in rates:
                        for f in xrange(self.F):
                            self.Y[item][f] += self.alpha * (s[f] * ru - self.lmbd * self.Y[item][f])
                self.alpha *= 0.9  # 每次迭代步长要逐步缩小
    
        def predict(self, user, item, ratedItems):
            '''预测用户user对物品item的评分
            '''
            z = [0.0 for f in xrange(self.F)]
            for ri, _ in ratedItems:
                for f in xrange(self.F):
                    z[f] += self.Y[ri][f]
            return sum(
                (self.P[user][f] + z[f] / math.sqrt(1.0 * len(ratedItems))) * self.Q[item][f] for f in xrange(self.F)) + \
                   self.bu[user] + self.bi[item] + self.mu
    
    
    if __name__ == '__main__':
        '''用户有A B C,物品有a b c d'''
        rating_data = list()
        rate_A = [('a', 1.0), ('b', 1.0)]
        rating_data.append(('A', rate_A))
        rate_B = [('b', 1.0), ('c', 1.0)]
        rating_data.append(('B', rate_B))
        rate_C = [('c', 1.0), ('d', 1.0)]
        rating_data.append(('C', rate_C))
    
        lfm = SVDPP(rating_data, 2)
        lfm.train()
        for item in ['a', 'b', 'c', 'd']:
            print(item, lfm.predict('A', item, rate_A))  # 计算用户A对各个物品的喜好程度
    

    运行代码,推荐结果:
    a 1.0006164975499188
    b 0.994332724556376
    c 1.0139922754595898
    d 0.9916958194602059

    相关文章

      网友评论

        本文标题:推荐系统9:MF推荐

        本文链接:https://www.haomeiwen.com/subject/haipchtx.html