rock聚类算法的实现

Newton毕设用到rock算法做博客聚类分析,对rock算法的实现最初也比较困惑,主要是rock算法其中用到的优先堆,后来终于搞清楚它的实现了,现在贴出来供大家以后实现rock算法参考,如果有错误请指正,同时欢迎就此算法进行探讨。其中用到的平衡树代码在前面的文章中已经贴了出来。

ROCK是Sudipno Guha等1999年提出的一个著名的面向分类属性数据的聚类算法,其突出贡献是采用公共近邻(链接)数的全局信息作为评价数据点间相关性的度量标准,而不是传统的基于两点间距离的局部度量函数.

 A 预处理计算公共点的数量

procedure
 compute_links(S)
begin
1
Compute inlist[i] for every point I in S
2 Set link[I,j] to be zero for all i,j
3 for i: = 1 to n do {
4 N: = inlist[i];
5 for j: = 1 to |N|-1 do 
6         for l: = j+1 to |N| do 
7                 link[ N[j], N[l] ]: = link[ N[j], N[l] ] + 1
8 }
end   其中  
1 inlist[i]
表示指向
i
点的集合

此计算连接度的方法未预处理,仅仅被执行一次  
B rock算法主体
 
procedure
 cluster(S,k)
begin
1. link: = compute_links(S)
2.
for each
 s

S do
3. q[s]: = build_local_heap(link,s)
4. Q: = build_global_heap(S,q)
5. 
while
 size(Q)>k do {
6. u: = extract_max(Q)
7. v: = max(q[u])
8. delete(Q,v)
9. w: = merge(u,v)
10. 
for each 


 q[u] 

q[v] {

11.        link[x,w]:=link[x,u] + link[x,v]
12.        delete(q[x],u); delete(q[x],v)
13.        insert(q[x],w,g(x,w));insert(q[w],x,g(x,w))
14.        update(Q,x,q[x])
15. }
16. insert(Q,w,q[w])
17. deallocate(q[u]); deallocate(q[v])
18. }
end  
注意到算法中有两种队列,全局队列Q和普通队列q[i] 
1算法中的全局
Q
具有的操作是

1
)取最大元素的编号
 extract_max(Q)

2
)删除最大元素
 delete(Q,v)

3
)更新
Q
中任意元素
 update(Q,x,q[x])

4
)求
Q
中元素个数
 size(Q)

5
)插入元素
 insert(Q,w,q[w])

数组中的local_heap 
q[u]
进行的操作

1
)取最大元素的编号
 max(q[u])

2
)删除任意元素
 delete(q[x],u)

3
)插入元素
 insert

q[x],w,g(x,w))


4
)销毁
q[u]  deallocate(q[u])
注意到Q中的操作
(3)和(2)  和q[u]中的(2)和(3),可以按照编号来删除和插入以及更新操作,所
不应该是2叉堆实现的,仔细想想应该是树形结构模拟的堆和一个数组编号索引实现的。同时虽然Q是一个普通队列的集合,但是仔细想想并不需要将Q中的元素按照local_heap存储,因为Q中只需要最大元素就可以了,因此可以考虑用一个编号和此编号的local_heap最大值来代表Q中的此堆,好处是加快查找速度和节省空间。
好了,不废话了,看看实现的代码吧,调用列子由后面的注释列子给出。其中用到的平衡树就是  

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;

namespace DataStructure
{

    public class RockType
    {
        public int id;
        public double gkey;
        public RockType(int id,double gkey)
        {
            this.id = id;
            this.gkey = gkey;
        }
        public static int CompareFunc(RockType a, RockType b)
        {
            int gCompare=a.gkey.CompareTo(b.gkey);
            if (gCompare == 0) return a.id – b.id;
            return gCompare;
        }
    }
    public class RockQueue
    {
 //       public const int N=1100; 
        NewtonAVLTree<RockType> pTree;
        public RockType[] rt;
        public int id;
        int count=0;
        public int Capacity
        {
            get
            {
                return rt.Length;
            }
        }
        public RockQueue(int len)
        {
            pTree = new NewtonAVLTree<RockType>();
            rt = new RockType[len];
        }
        public bool Delete(int id) //id为要删除的编号 
        {
            if (rt[id] == null)
                return false;
            RockType pData = rt[id];
            bool flag = pTree.AVLTree_Delete(pData, RockType.CompareFunc);
            if (flag)
            {
                count–;
                rt[id] = null;
            }
            return flag;
        }
        public bool Insert(RockType pRT)
        {
            if (rt[pRT.id] != null) return false;
            RockType pData=pRT;
            rt[pRT.id] = pRT;
            bool flag = pTree.AVLTree_Insert(pData, RockType.CompareFunc);
            if (flag) count++;
            return flag;
        }
        public bool Insert(int id,double gkey)
        {
            RockType pRT=new RockType(id,gkey);
            return Insert(pRT);
        }
        public RockType ExtractMax()
        {
            NewtonTreeNode<RockType> maxNode = pTree.AVLTree_GetMax();
            pTree.AVLTree_Delete(maxNode.pData, RockType.CompareFunc);
            count–;
            rt[maxNode.pData.id] = null;
            return maxNode.pData;
        }
        public RockType GetMax()
        {
            return pTree.AVLTree_GetMax().pData;
        }
        public int Size()
        {
            return count;
        }
        public bool Update(int id,double gkey)
        {
            if(rt[id]==null) return false;
            RockType newData = new RockType(id, gkey);
            RockType oldData = rt[id];
            if (newData.Equals(oldData))
            {
                Console.WriteLine(“插入相同的值”);
                return false;
            }
            pTree.AVLTree_Delete(oldData, RockType.CompareFunc);
            pTree.AVLTree_Insert(newData, RockType.CompareFunc);
            rt[id] = newData;
            return true;
        }
        public static int CompareFunc(RockQueue a, RockQueue b)
        {
            return RockType.CompareFunc(a.GetMax(), b.GetMax());
        }
    }
 
    public class RockFunc
    {
        /// <summary>
        /// 计算inlist和linkNum
        /// </summary>
        /// <param name=”u”>存放节点间的指向关系</param>
        /// <param name=”len”>len是节点的个数</param>
        public static LINK compute_linkNum(bool[,] u, int len,int k)
        {
            LINK link=new LINK(len,k);
             /// <summary>
            /// inlist[i,]是指向i节点的所有节点集合
            /// </summary>
            int[,] inlist;

            /// <summary>
            /// top[i]为指向第i个节点的点的个数,就是inlist[i,]中元素个数
            /// </summary>
            int[] top;
            inlist = new int[len, len];
            top = new int[len];
            int i = 0, j = 0;
            for (i = 0; i < len; i++)
            {
                for (j = 0; j < len; j++)
                {
                    if (u[j, i]) inlist[i, top[i]++] = j;
                }
            }
            for (i = 0; i < len; i++)
            {
                int sumn = top[i];
                for (j = 0; j < sumn – 1; j++)
                    for (int l = j+1; l < sumn; l++)
                    {
                        link.linkNum[inlist[i, j], inlist[i, l]]++;
                        link.linkNum[inlist[i, l], inlist[i, j]]++;
                    }
            }
            return link;
        }
        public static double gFunc(int i, int j, LINK link)
        {
            const double w=0.5;
            double mi=1.0+2.0*w;
            return ((double)link.linkNum[i, j]) / (
                Math.Pow((link.setN[i] + link.setN[j]), mi) – Math.Pow(link.setN[i], mi) – Math.Pow(link.setN[j], mi));
        }
        public static RockQueue build_local_heap(LINK link, int s)
        {
            RockQueue qs = new RockQueue(link.setN.Length);
            for (int i = 0; i < link.len; i++)
            {
                if(i==s) continue;
                double gkey=gFunc(i,s,link);
                qs.Insert(i, gkey);
            }
            return qs;
        }
        public static RockQueue build_global_heap(LINK link,RockQueue[] q)
        {
            RockQueue gheap = new RockQueue(link.setN.Length);
            for (int i = 0; i < link.len; i++)
            {
                RockType iMax = q[i].GetMax();
                RockType iRT = new RockType(i, iMax.gkey);
                gheap.Insert(iRT);
            }
            return gheap;
        }
        public static int size(RockQueue Q)
        {
            return Q.Size();
        }
        public static int extract_max(RockQueue Q)
        {
            return Q.ExtractMax().id;
        }
        public static int max(RockQueue qu)
        {
            return qu.GetMax().id;
        }
        public static bool delete(RockQueue queue, int id)
        {
            if (queue == null) return false;
            return queue.Delete(id);
        }
        public static int merge(int u, int v,ref LINK link)
        {
            int w=link.newElement();
            link.setN[w] = link.setN[u] + link.setN[v];
            link.sets[u] = link.sets[v] = w;
            return w;
        }
        public static List<int> UnionElements(int u, int v,RockQueue[] q)
        {
            int len = q[u].Capacity;
            List<int> list = new List<int>(len);
            for (int i = 0; i < len; i++)
            {
                if (q[u].rt[i] != null || q[v].rt[i] != null)
                {
                    list.Add(i);
                }
            }
            return list;
        }
        public static void deallocate(ref RockQueue queue)
        {
            queue = null;
        }
        public static bool insert(RockQueue queue,int w,double gkey)
        {
            if (queue == null) return false;
            return queue.Insert(w, gkey);
        }
        public static bool insert(RockQueue Q, int w, RockQueue qw)
        {
            return Q.Insert(w, qw.GetMax().gkey);
        }
        public static void update(RockQueue Q, int x, RockQueue qx)
        {
            if (qx == null) return;
            Q.Update(x, qx.GetMax().gkey);
        }

    }
    public class LINK
    {
        /// <summary>
        /// linkNum[i,j]表示节点i和j公共指向节点的个数
        /// </summary>
        public int[,] linkNum;
        public int len,now;
        public int[] setN;
        public int[] sets;
        public int newElement()
        {
            return now++;
        }
        public LINK(int Length,int k)
        {
            len = Length + Length-k;
            this.linkNum = new int[len, len];
            this.setN = new int[len];
            this.sets = new int[len];
            for (int i = 0; i < len; i++)
            {
                setN[i] = 1;
                sets[i] = i;
            }
            len = Length;
            now = len;
        }
    }

    public class RockCluster
    {
        LINK link;
        public int[] RockAlgorithm(int k, bool[,] S, int len)
        {
            link = RockFunc.compute_linkNum(S, len,k);
            RockQueue[] q=new RockQueue[len+ len – k];
            for (int s = 0; s < len; s++)
                q[s] = RockFunc.build_local_heap(link, s);
            RockQueue Q = RockFunc.build_global_heap(link, q);
            while (RockFunc.size(Q) > k)
            {
                int u = RockFunc.extract_max(Q);
                int v = RockFunc.max(q[u]);
                RockFunc.delete(Q, v);
                int w = RockFunc.merge(u, v, ref link);
                q[w] = new RockQueue(len + len – k);
                List<int> xset = RockFunc.UnionElements(u, v, q);
                
                foreach (int x in xset)
                {
                    link.linkNum[x, w] = link.linkNum[x, u] + link.linkNum[x, v];
                    RockFunc.delete(q[x], u); RockFunc.delete(q[x], v);
                    RockFunc.insert(q[x],w,RockFunc.gFunc(x,w,link)); 
                    if(Q.rt[x]!=null) RockFunc.insert(q[w],x,RockFunc.gFunc(x,w,link));
              RockFunc.update(Q,x,q[x]); 
                }
                RockFunc.insert(Q,w,q[w]);
                RockFunc.deallocate(ref q[u]); RockFunc.deallocate(ref q[v]);
            }
            
            int[] bloggerGroupIds = new int[len + len – k];
            link.sets.CopyTo(bloggerGroupIds, 0);
            for (int i = 0; i < len; i++)
            {
                int mid=i;
                while (mid != bloggerGroupIds[mid])
                {
                    mid = bloggerGroupIds[mid];
                }
                bloggerGroupIds[i] = mid;
            }
            int[] bloggerGroupIds2 = new int[len];
            for (int i = 0; i < len; i++)
                bloggerGroupIds2[i] = bloggerGroupIds[i];
            return bloggerGroupIds2;
        }
    }
}

//   public static void showBloggerIds(int[] BloggerIds)
//        {
//            for (int i = 0; i < BloggerIds.Length; i++)
//                Console.WriteLine(“{0},{1}”,i,BloggerIds[i]);
//        }
//        static void Main(string[] args)
//        {
//            RockCluster rc = new RockCluster();
//            bool [,]S=new bool[5,5];
//            S[0, 4] = true;
//            S[1, 2] = true;
//            S[2, 4] = true;
//            S[3, 4] = true;
//            S[2, 3] = true;
//            S[0, 3] = true;
//            S[4, 2] = true;
//            int len=5;
//            int []BloggerIds = rc.RockAlgorithm(2, S, len);
//            showBloggerIds(BloggerIds);
//}

    原文作者:聚类算法
    原文地址: https://blog.csdn.net/neuqxzy/article/details/44452901
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞