推荐算法---peason相关系数

#!/usr/local/bin/python2.7 # encoding: utf-8

import sys import os

from argparse import ArgumentParser from argparse import RawDescriptionHelpFormatter

from math import sqrt

import moivescore  # import module defined by yourself  from rope.base.prefs import Prefs from bokeh.models.tools import Scroll

# 利用欧几里德距离评价相关性 def sim_distance(prefs,p1,p2):     si = {}  # mark the moive name that both appeared  p1 and p2     for item in prefs[p1]: #         print(item)         if item in prefs[p2]:             si[item] = 1 #             print(si)          if len(si)==0: return 0  #  have no same moive          # 利用欧几里德距离评价相关性     sum_of_squares = sum([pow(prefs[p1][item]-prefs[p2][item], 2) for item in prefs[p1] if item in prefs[p2] ])      #     print(sum_of_squares)     print(1/(1+sqrt(sum_of_squares)))          return 1/(1+sqrt(sum_of_squares))     # 利用皮尔逊相关系数进行评价,可以修正评分中的夸大分值     def sim_pearson(prefs,p1,p2):     si = {}     for item in prefs[p1]:         if item in prefs[p2]:             si[item] = 1                  if len(si)==0: return 0          n = len(si)     # EX     sum1 = sum(prefs[p1][item] for item in si)     # EY     sum2 = sum(prefs[p2][item] for item in si)          # EX2     sqsum1 = sum(pow(prefs[p1][item],2) for item in si)     # EY2     sqsum2 = sum(pow(prefs[p2][item],2) for item in si)          #EXY     psum = sum(prefs[p1][item]*prefs[p2][item] for item in si)          #EXY-EX*EY     num = psum-(sum1*sum2/n)          den = sqrt((sqsum1-pow(sum1,2)/n)*(sqsum2-pow(sum2,2)/n))          if den == 0: return 0          r = num/den #     print(r)     return r

# print(moivescore.critics[‘Lisa Rose’]) # key and value

# print(moivescore.critics[‘Lisa Rose’][‘Lady in the Water’])

# sim_distance(moivescore.critics, ‘Lisa Rose’, ‘Gene Seymour’) # sim_distance(moivescore.critics, ‘Lisa Rose’, ‘Michael Phillips’) # sim_distance(moivescore.critics, ‘Lisa Rose’, ‘Claudia Puig’) # sim_distance(moivescore.critics, ‘Lisa Rose’, ‘Mick LaSalle’) # sim_distance(moivescore.critics, ‘Lisa Rose’, ‘Jack Matthews’) # sim_distance(moivescore.critics, ‘Lisa Rose’, ‘Toby’) # sim_distance(moivescore.critics, ‘Lisa Rose’, ‘xiaoYu’) print(‘——————–pearson——————————–‘) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘Gene Seymour’) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘Michael Phillips’) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘Claudia Puig’) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘Mick LaSalle’) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘Jack Matthews’) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘Toby’) # sim_pearson(moivescore.critics, ‘Lisa Rose’, ‘xiaoYu’)

# find the person who have the most likely taste with you def topMatches(prefs,person,n=5,similarity=sim_pearson): #     scores = [other for other in prefs if person!=other] #     print(scores)     scores = [(other,similarity(prefs,person,other)) for other in prefs if person!=other] #     print(scores)     scores.sort()     scores.reverse()     print(scores)     print(scores[0:n])     return scores[0:n] # return existing data from 0 to n           # topMatches(moivescore.critics, ‘Lisa Rose’, 1)

# use pearson to provide us a recommendation of the film def getRecommendation(prefs,person,similarity=sim_pearson):     totals={} # sum (similarity*score) all the movie which i havent see     simSums={} # sum similarity of all the movie which i havent see     for other in prefs:         if other == person: continue         sim = similarity(prefs,person,other)                  if sim<=0: continue                  for item in prefs[other]:             # only estimate the movie this person never watched before, means he has no score on this movie             if item not in prefs[person] or prefs[person][item]==0:                 totals.setdefault(item,0)                 # similarity*score                 totals[item]+=prefs[other][item]*sim                                  simSums.setdefault(item,0)                 simSums[item]+=sim                                       print(totals)     print(simSums)          # create a ranking list     rankings = [(item,total/simSums[item]) for item,total in totals.items()]     rankings.sort()     rankings.reverse()     print(rankings)     return rankings           getRecommendation(moivescore.critics, ‘Toby’)

    原文作者:推荐算法
    原文地址: https://yq.aliyun.com/articles/613050
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞