美文网首页数据挖掘
推荐系统之itemcf算法代码

推荐系统之itemcf算法代码

作者: DreamWeave_fdbe | 来源:发表于2020-02-22 23:13 被阅读0次
    #-*-coding:utf8-*-
    
    import sys 
    import math 
    import operator 
    
    import os 
    def get_user_click(rating_file):
        #收集用户的点击序列
        if not os.path.exists(rating_file):
            return {}
        num = 0 
        user_click = {}
        fp = open(rating_file,encoding='utf-8')
        for line in fp:
            if num == 0:
               num += 1
               continue
            #过滤掉第一行
            item = line.strip().split(',')
            if len(item) < 4:
                continue
            #过滤掉信息不全的行
            [userid,itemid,rating,timestamp] = item 
            if float(rating) < 3.0:
                continue
            #过滤掉低分评价,>=3分的表示喜欢
            if userid not in user_click:
                user_click[userid] = []
            user_click[userid].append(itemid)
            #用list方式存储信息
        fp.close()
        return user_click
    
    def get_item_info(item_file):
        if not os.path.exists(item_file):
            return {}
        num = 0
        item_info = {}
        fp = open(item_file,encoding='utf-8')
        for line in fp:
            if num == 0:
                num += 1
                continue
            #过滤掉第一行
            item = line.strip().split(',')
            if len(item) < 3:
                continue 
            if len(item) == 3:
                [itemid,title,genres] = item
            elif len(item) > 3:
                itemid = item[0]
                genres = item[-1]
                title  = ",".join(item[1:-1])
             
            if itemid not in item_info:
                item_info[itemid] = [title,genres]
        fp.close()
        return item_info
    
    def base_contribute_score():
        return 1 
    
    
    
    
    def cal_item_sim(user_click):
        #item_sim_score(字典类型)  :key-->itemid_i,value-->[itemid_j:simscore](字典类型)
        #user_click(字典类型):key-->userid,value-->[itemid1,itemid2]
        co_appear = {}
        item_user_click_num = {}
        for user,itemlist in user_click.items():
            for index_i in range(0,len(itemlist)):
                itemid_i = itemlist[index_i]
                item_user_click_num.setdefault(itemid_i,0)
                item_user_click_num[itemid_i] += 1
    
    
    
    
    
                for index_j in range(index_i+1,len(itemlist)):
                    itemid_j = itemlist[index_j]
                    #如果用户同时点击了itemid_i,itemid_j,就增加贡献值
    
                    #One:itemid_i对itemid_j的贡献
    
                    co_appear.setdefault(itemid_i,{})
                    #参考注释里的sim_info 
                    co_appear[itemid_i].setdefault(itemid_j,0) 
                    #co_appear[itemid_i][itemid_j] += 1
                    co_appear[itemid_i][itemid_j] += base_contribute_score()
    
                    #Two:itemid_j对itemid_i的贡献
    
                    co_appear.setdefault(itemid_j,{})
                    #参考注释里的sim_info 
                    co_appear[itemid_j].setdefault(itemid_i,0) 
                    #co_appear[itemid_i][itemid_j] += 1
                    co_appear[itemid_j][itemid_i] += base_contribute_score()
                #计算相似度
                item_sim_score = {}
                for itemid_i,relate_item in co_appear.items():
                    for itemid_j,co_time in relate_item.items():
                        #公式的分母
                        if (itemid_i in item_user_click_num) and (itemid_j in item_user_click_num):
                            fenmu =  math.sqrt(item_user_click_num[itemid_i]*item_user_click_num[itemid_j])
                            sim_score = co_time / fenmu 
                        else: 
                            continue
                        #存储得分
                        item_sim_score.setdefault(itemid_i,{})
                        item_sim_score[itemid_i].setdefault(itemid_j,0)
                        item_sim_score[itemid_i][itemid_j] = sim_score
    
        #按相似性进行排序
        item_sim_score_sorted = {}
        for itemid in item_sim_score:
            item_sim_score_sorted[itemid] = sorted(item_sim_score[itemid].items(),key=operator.itemgetter(1),reverse=True)
    
        
    
        return  item_sim_score_sorted
    
    
    def cal_recom_result(sim_info,user_click):
        #result(字典类型):key-->userid,value-->[itemid:recom_score](字典类型)
    
    
    
        recent_click_num = 3 
        topk = 5 
        recom_info1  = {}
        recom_info2  = {}
        for user in user_click:
            click_list = user_click[user]
            for itemid in click_list[:recent_click_num]:
                if itemid not in sim_info:
                    continue
                #如果不存在,跳过
                for itemsim_zuhe in sim_info[itemid][:topk]:
                    itemsimid = itemsim_zuhe[0]
                    itemsimscore = itemsim_zuhe[1]
                    recom_info1[itemsimid] = itemsimscore
                    recom_info2[user] = recom_info1
                        
                    
    
        return recom_info2
    
    
    def main_flow():
        #step1:得到用户的点击序列
        user_click=get_user_click('./data/ratings.csv')
        #step2:计算物品的相似度
        sim_info  = cal_item_sim(user_click)
        #step3:根据相似度来推荐
        recom_result = cal_recom_result(sim_info,user_click)
        print (recom_result['1'])
    
    if __name__ == '__main__':
        main_flow()
    

    参考:https://www.imooc.com/learn/1029

    相关文章

      网友评论

        本文标题:推荐系统之itemcf算法代码

        本文链接:https://www.haomeiwen.com/subject/fozrqhtx.html