推荐系统实现第2章源码

import math

def UserSimilarity(train):

item_users = dict()

for u,items in train.items():

for i in items:

if i not in item_users:

item_users[i]=set()

item_users[i].add(u)

print  item_users

#{'a': set(['A', 'B']), 'c': set(['B', 'D']), 'b': set(['A', 'C']), 'e': set(['C', 'D']), 'd': set(['A', 'D'])}

C = dict()

N = dict()

for i , users in item_users.items():

for u in users:

N.setdefault(u,0)

N[u] += 1

for v in users:

if u == v:

continue

C.setdefault(u,{})

C[u].setdefault(v,0)

C[u][v] += 1

print  C

print  N

#{'A': {'C': 1, 'B': 1, 'D': 1}, 'C': {'A': 1, 'D': 1}, 'B': {'A': 1, 'D': 1}, 'D': {'A': 1, 'C': 1, 'B': 1}}

#{'A': 3, 'C': 2, 'B': 2, 'D': 3}

W = dict()

for u , related_users in C.items():

for v , cuv in related_users.items():

W.setdefault(u,{})

W[u].setdefault(v,0)

W[u][v] = cuv / math.sqrt(N[u] * N[v])

return W

train={'A':['a','b','d'],'B':['a','c'],'C':['b','e'],'D':['c','d','e']}

F = UserSimilarity(train)

print F

#{'A': {'C': 0.4082482904638631, 'B': 0.4082482904638631, 'D': 0.3333333333333333}, 'C': {'A': 0.4082482904638631, 'D': 0.4082482904638631}, 'B': {'A': 0.4082482904638631, 'D': 0.4082482904638631}, 'D': {'A': 0.3333333333333333, 'C': 0.4082482904638631, 'B': 0.4082482904638631}}

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容