前言:本文是机器学习实战中的案例,餐馆菜肴推荐系统
from numpy import *
from numpy import linalg as la
# 载入数据 (用户-菜肴矩阵)
# 行为 用户, 列为希肴, 表示用户对某个菜肴的评分
def loadExData2():
return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
[0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
[5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
[0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
[4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
[0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
# 计算两个评分的欧氏距离
def esclidSim(inA,inB):
if len(inA)<3:
return 1.0
return 1.0/(1.0+la.norm(inA-inB))
# 计算两个评分的 皮尔逊相关系数 (Pearson Correlation)
def pearsSim(inA,inB):
if len(inA)<3:
return 1.0
return 0.5+0.5*corrcoef(inA.inB)[0][1]
# 计算两个评分的余弦相似度 (Cosine similarity)
def cosSim(inA,inB):
num = float(inA.T*inB)
denom = la.norm(inA)*la.norm(inB)
return 0.5+0.5*(num/denom)
# 基于物品的相似度推荐
def standEst(dataMat,user,sinMeas,item):
n = shape(dataMat)[1]
simTotal = 0.0;ratsimTotal = 0.0
for j in range(n):
userRating = dataMat[user,j]
if userRating ==0:
continue
overLap = nonzero(logical_and(dataMat[:,item].A>0,dataMat[:,j].A>0))[0]
if len(overLap)==0:
simility =0
else:
simility = sinMeas(dataMat[overLap,item],dataMat[overLap,j])
print("the %d and %d similit is :%f"%(item,j,simility))
simTotal +=simility
ratsimTotal+=simility*userRating
if simTotal == 0:
return 0
else:
return ratsimTotal/simTotal
def recommand(dataMat,user,n=3,simMeans=cosSim,estMethod =standEst):
unratedItems = nonzero(dataMat[user,:].A==0)[1]
if len(unratedItems)==0:
return "you rated everything"
itemScores =[]
for item in unratedItems:
estimatscore = estMethod(dataMat,user,simMeans,item)
itemScores.append((item,estimatscore))
return sorted(itemScores,key = lambda jj:jj[1],reverse=True)[:n]
myMat = mat(loadExData2())
print(recommand(myMat, 1))
运行结果:
the 0 and 10 similit is :1.000000
the 1 and 10 similit is :1.000000
the 2 and 10 similit is :1.000000
the 6 and 3 similit is :1.000000
the 6 and 5 similit is :1.000000
the 6 and 10 similit is :1.000000
the 7 and 10 similit is :1.000000
the 8 and 10 similit is :1.000000
the 9 and 3 similit is :1.000000
the 9 and 5 similit is :1.000000
the 9 and 10 similit is :1.000000
[(6, 3.3333333333333335), (9, 3.3333333333333335), (0, 3.0)]
Process finished with exit code 0