美文网首页数据挖掘
推荐系统之itemcf算法代码

推荐系统之itemcf算法代码

作者: DreamWeave_fdbe | 来源:发表于2020-02-22 23:13 被阅读0次
#-*-coding:utf8-*-

import sys 
import math 
import operator 

import os 
def get_user_click(rating_file):
    #收集用户的点击序列
    if not os.path.exists(rating_file):
        return {}
    num = 0 
    user_click = {}
    fp = open(rating_file,encoding='utf-8')
    for line in fp:
        if num == 0:
           num += 1
           continue
        #过滤掉第一行
        item = line.strip().split(',')
        if len(item) < 4:
            continue
        #过滤掉信息不全的行
        [userid,itemid,rating,timestamp] = item 
        if float(rating) < 3.0:
            continue
        #过滤掉低分评价,>=3分的表示喜欢
        if userid not in user_click:
            user_click[userid] = []
        user_click[userid].append(itemid)
        #用list方式存储信息
    fp.close()
    return user_click

def get_item_info(item_file):
    if not os.path.exists(item_file):
        return {}
    num = 0
    item_info = {}
    fp = open(item_file,encoding='utf-8')
    for line in fp:
        if num == 0:
            num += 1
            continue
        #过滤掉第一行
        item = line.strip().split(',')
        if len(item) < 3:
            continue 
        if len(item) == 3:
            [itemid,title,genres] = item
        elif len(item) > 3:
            itemid = item[0]
            genres = item[-1]
            title  = ",".join(item[1:-1])
         
        if itemid not in item_info:
            item_info[itemid] = [title,genres]
    fp.close()
    return item_info

def base_contribute_score():
    return 1 




def cal_item_sim(user_click):
    #item_sim_score(字典类型)  :key-->itemid_i,value-->[itemid_j:simscore](字典类型)
    #user_click(字典类型):key-->userid,value-->[itemid1,itemid2]
    co_appear = {}
    item_user_click_num = {}
    for user,itemlist in user_click.items():
        for index_i in range(0,len(itemlist)):
            itemid_i = itemlist[index_i]
            item_user_click_num.setdefault(itemid_i,0)
            item_user_click_num[itemid_i] += 1





            for index_j in range(index_i+1,len(itemlist)):
                itemid_j = itemlist[index_j]
                #如果用户同时点击了itemid_i,itemid_j,就增加贡献值

                #One:itemid_i对itemid_j的贡献

                co_appear.setdefault(itemid_i,{})
                #参考注释里的sim_info 
                co_appear[itemid_i].setdefault(itemid_j,0) 
                #co_appear[itemid_i][itemid_j] += 1
                co_appear[itemid_i][itemid_j] += base_contribute_score()

                #Two:itemid_j对itemid_i的贡献

                co_appear.setdefault(itemid_j,{})
                #参考注释里的sim_info 
                co_appear[itemid_j].setdefault(itemid_i,0) 
                #co_appear[itemid_i][itemid_j] += 1
                co_appear[itemid_j][itemid_i] += base_contribute_score()
            #计算相似度
            item_sim_score = {}
            for itemid_i,relate_item in co_appear.items():
                for itemid_j,co_time in relate_item.items():
                    #公式的分母
                    if (itemid_i in item_user_click_num) and (itemid_j in item_user_click_num):
                        fenmu =  math.sqrt(item_user_click_num[itemid_i]*item_user_click_num[itemid_j])
                        sim_score = co_time / fenmu 
                    else: 
                        continue
                    #存储得分
                    item_sim_score.setdefault(itemid_i,{})
                    item_sim_score[itemid_i].setdefault(itemid_j,0)
                    item_sim_score[itemid_i][itemid_j] = sim_score

    #按相似性进行排序
    item_sim_score_sorted = {}
    for itemid in item_sim_score:
        item_sim_score_sorted[itemid] = sorted(item_sim_score[itemid].items(),key=operator.itemgetter(1),reverse=True)

    

    return  item_sim_score_sorted


def cal_recom_result(sim_info,user_click):
    #result(字典类型):key-->userid,value-->[itemid:recom_score](字典类型)



    recent_click_num = 3 
    topk = 5 
    recom_info1  = {}
    recom_info2  = {}
    for user in user_click:
        click_list = user_click[user]
        for itemid in click_list[:recent_click_num]:
            if itemid not in sim_info:
                continue
            #如果不存在,跳过
            for itemsim_zuhe in sim_info[itemid][:topk]:
                itemsimid = itemsim_zuhe[0]
                itemsimscore = itemsim_zuhe[1]
                recom_info1[itemsimid] = itemsimscore
                recom_info2[user] = recom_info1
                    
                

    return recom_info2


def main_flow():
    #step1:得到用户的点击序列
    user_click=get_user_click('./data/ratings.csv')
    #step2:计算物品的相似度
    sim_info  = cal_item_sim(user_click)
    #step3:根据相似度来推荐
    recom_result = cal_recom_result(sim_info,user_click)
    print (recom_result['1'])

if __name__ == '__main__':
    main_flow()

参考:https://www.imooc.com/learn/1029

相关文章

网友评论

    本文标题:推荐系统之itemcf算法代码

    本文链接:https://www.haomeiwen.com/subject/fozrqhtx.html