FCM聚类算法

之前提到了K均值算法,多数讨论认为K均值与硬C均值(HCM)算法本质相同。在HCM的基础上加入了对聚类簇的模糊划分,引入了隶属度来提升算法性能。

import copy,math,random,time,sys
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import decimal


MAX = 10000.0#构建初始理数矩阵
Epsilon = 0.000001#停止条件

def randomise(data):#随机打乱数据,同时返回原数据顺序
    order = list(range(len(data)))
    random.shuffle(oder)#记录原始数据顺序
    new_data = [[]for i in range(len(data))]
    for index in range(len(order)):
        new_data[index] = data[order[index]]
    return new_data,order

def de_randomise(data,order):#恢复被打乱的数据
    new_data = [[]for i in range(len(data))]
    for index in range(len(order)):
        new_data[order[index]] = data[index]
    return new_data

def print_matrix(lists):#打印矩阵
    for i in range(len(list)):
        print lists[i]

        
def init_U(data,clu_num):#随机初始化隶属矩阵
        global MAX
        U = []
        for i in range(len(data)):
            current = []
            random_sum = 0.0
            for j in range(clu_num):
                a = random.randint(1,int(MAX))
                current.append(a)
                random_sum += a
            for j in range(clu_num):
                current[j] = current[j]/random_sum#归一化处理
            U.append(current)
        return U

                
def distance(v1,v2):#两点距离公式
    if len(v1)!=len(v2):
        return -1
    
    
    return sqrt(sum(power(v2-v1,2)))

def end_iterate(U,U_old):#隶属矩阵变化过小时停止迭代
    global Epsilon
    for i in range(len(U)):
        for j in range(len(U[0])):
            if abs(U[i][j]-U_old[i][j])>Epsilon:
                return False
    return True

def normalise_U(U):#将U规范化,找出最可能的簇
    for i in range(len(U)):
        max_u = max(U[i])
        for j in range(len(U[0])):
            if U[i][j]!=max_u:
                U[i][j]=0
            else:
                U[i][j]=1
    return U


def fuzz_c_mean(data,clu_num,m,max_iterate):#FCM算法
    U = init_U(data,clu_num)
    current_iterate = 0
    while 1:
        current_iterate += 1
        U_old = copy.deepcopy
        C = []
        for i in range(clu_num):#更新聚类中心
            current_clu = []
            for j in range(len(data[0])):
                sum_num = 0.0
                sum_dum = 0.0
                for k in range(len(data)):
                    sum_num += (u[k][i]**m)*data[k][j]
                    sum_dum += (u[k][i]**m)#带入Pj公式
                current_clu.append(sum_num/sum_dum)#第j列聚类中心
            C.append(current_clu)#第i簇聚类中心
            
        distance_mat = []#生成距离矩阵
    
        for i in range (len(data)):
            current = []
            for j in range(clu_num):
                current.append(distance(data[i],C[j]))
            distance_mat.append(current)
    
        for i in range(clu_num):#更新U
            for j in range(len(data)):
                for k in range(clu_num):
                    a += (distance_mat[j][i]/distance_mat[j][k])**(2/(m-1))
                U[j][i] = 1/a
                
                
        if end_iterate(U,U_old):#判断是否迭代有效
            print 'mission complete'
            break
        elif current_iterate>max_iterate:#判断是否超出迭代次数
            print 'iterate overflow'
            break
            
        
            
            
    U = normalise_U(U)#标准化U后返回U
    
    return U

    原文作者:聚类算法
    原文地址: https://blog.csdn.net/li_huifei/article/details/78207494
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞