# 自定义皮尔森相似度计算公式
def pearson_sim(target_user,other_user,data):
rating1=data[other_user]
rating2=data[target_user]
from math import sqrt
sum_x=0
sum_y=0
sum_xy=0
n=0
sum_x2=0
sum_y2=0
for key in rating1:
if key in rating2:
x=rating1[key]
y=rating2[key]
sum_xy +=x*y#sum_xy=sum_xy+x*y
sum_x +=x
sum_y +=y
n+=1
sum_x2 +=pow(x,2)
sum_y2 += pow(y,2)
fenzi=n*sum_xy-sum_x*sum_y
fenmu=sqrt(n*sum_x2-pow(sum_x,2))*sqrt(n*sum_y2-pow(sum_y,2))
if fenmu==0:
return 0
else:
sim=fenzi/fenmu
return sim
# 自定义找topk邻居
def get_neighbor(data,target_user,k=3):
#计算target_user与其他user的相似度
sim=[]
for user in data:
if user !=target_user:
s=pearson_sim(user,'六爷',data)
sim.append((s,user))
#筛选topk的邻居
sim.sort(reverse=True)#降序排列
neighbor=sim[:k]
return neighbor
# 自定义推荐
def rec(target_user,data):
neighbor=get_neighbor(data,target_user)#找到邻居
#找邻居看过的电影,而目标客户没有看过的
rec=[]
for i in neighbor:
for movie in data[i[1]]:
if not movie in data[target_user]:
rec.append((movie,data[i[1]][movie]))
rec_sort=sorted(rec,key=lambda x:x[1],reverse=True)#根据评分进行降序排序
final_rec=list(set(rec_sort))#去重复
return final_rec
网友评论