Newton毕设用到rock算法做博客聚类分析,对rock算法的实现最初也比较困惑,主要是rock算法其中用到的优先堆,后来终于搞清楚它的实现了,现在贴出来供大家以后实现rock算法参考,如果有错误请指正,同时欢迎就此算法进行探讨。其中用到的平衡树代码在前面的文章中已经贴了出来。
ROCK是Sudipno Guha等1999年提出的一个著名的面向分类属性数据的聚类算法,其突出贡献是采用公共近邻(链接)数的全局信息作为评价数据点间相关性的度量标准,而不是传统的基于两点间距离的局部度量函数.
A 预处理计算公共点的数量
procedure
compute_links(S)
begin
1
Compute inlist[i] for every point I in S
2 Set link[I,j] to be zero for all i,j
3 for i: = 1 to n do {
4 N: = inlist[i];
5 for j: = 1 to |N|-1 do
6 for l: = j+1 to |N| do
7 link[ N[j], N[l] ]: = link[ N[j], N[l] ] + 1
8 }
end 其中
1 inlist[i]
表示指向
i
点的集合
2
此计算连接度的方法未预处理,仅仅被执行一次
B rock算法主体
procedure
cluster(S,k)
begin
1. link: = compute_links(S)
2.
for each
s
∈
S do
3. q[s]: = build_local_heap(link,s)
4. Q: = build_global_heap(S,q)
5.
while
size(Q)>k do {
6. u: = extract_max(Q)
7. v: = max(q[u])
8. delete(Q,v)
9. w: = merge(u,v)
10.
for each
x
∈
q[u]
∪
q[v] {
11. link[x,w]:=link[x,u] + link[x,v]
12. delete(q[x],u); delete(q[x],v)
13. insert(q[x],w,g(x,w));insert(q[w],x,g(x,w))
14. update(Q,x,q[x])
15. }
16. insert(Q,w,q[w])
17. deallocate(q[u]); deallocate(q[v])
18. }
end
注意到算法中有两种队列,全局队列Q和普通队列q[i]
1算法中的全局
Q
具有的操作是
(
1
)取最大元素的编号
extract_max(Q)
(
2
)删除最大元素
delete(Q,v)
(
3
)更新
Q
中任意元素
update(Q,x,q[x])
(
4
)求
Q
中元素个数
size(Q)
(
5
)插入元素
insert(Q,w,q[w])
2
数组中的local_heap
q[u]
进行的操作
(
1
)取最大元素的编号
max(q[u])
(
2
)删除任意元素
delete(q[x],u)
(
3
)插入元素
insert
(
q[x],w,g(x,w))
(
4
)销毁
q[u] deallocate(q[u])
注意到Q中的操作
(3)和(2) 和q[u]中的(2)和(3),可以按照编号来删除和插入以及更新操作,所
不应该是2叉堆实现的,仔细想想应该是树形结构模拟的堆和一个数组编号索引实现的。同时虽然Q是一个普通队列的集合,但是仔细想想并不需要将Q中的元素按照local_heap存储,因为Q中只需要最大元素就可以了,因此可以考虑用一个编号和此编号的local_heap最大值来代表Q中的此堆,好处是加快查找速度和节省空间。
好了,不废话了,看看实现的代码吧,调用列子由后面的注释列子给出。其中用到的平衡树就是
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
namespace DataStructure
{
public class RockType
{
public int id;
public double gkey;
public RockType(int id,double gkey)
{
this.id = id;
this.gkey = gkey;
}
public static int CompareFunc(RockType a, RockType b)
{
int gCompare=a.gkey.CompareTo(b.gkey);
if (gCompare == 0) return a.id – b.id;
return gCompare;
}
}
public class RockQueue
{
// public const int N=1100;
NewtonAVLTree<RockType> pTree;
public RockType[] rt;
public int id;
int count=0;
public int Capacity
{
get
{
return rt.Length;
}
}
public RockQueue(int len)
{
pTree = new NewtonAVLTree<RockType>();
rt = new RockType[len];
}
public bool Delete(int id) //id为要删除的编号
{
if (rt[id] == null)
return false;
RockType pData = rt[id];
bool flag = pTree.AVLTree_Delete(pData, RockType.CompareFunc);
if (flag)
{
count–;
rt[id] = null;
}
return flag;
}
public bool Insert(RockType pRT)
{
if (rt[pRT.id] != null) return false;
RockType pData=pRT;
rt[pRT.id] = pRT;
bool flag = pTree.AVLTree_Insert(pData, RockType.CompareFunc);
if (flag) count++;
return flag;
}
public bool Insert(int id,double gkey)
{
RockType pRT=new RockType(id,gkey);
return Insert(pRT);
}
public RockType ExtractMax()
{
NewtonTreeNode<RockType> maxNode = pTree.AVLTree_GetMax();
pTree.AVLTree_Delete(maxNode.pData, RockType.CompareFunc);
count–;
rt[maxNode.pData.id] = null;
return maxNode.pData;
}
public RockType GetMax()
{
return pTree.AVLTree_GetMax().pData;
}
public int Size()
{
return count;
}
public bool Update(int id,double gkey)
{
if(rt[id]==null) return false;
RockType newData = new RockType(id, gkey);
RockType oldData = rt[id];
if (newData.Equals(oldData))
{
Console.WriteLine(“插入相同的值”);
return false;
}
pTree.AVLTree_Delete(oldData, RockType.CompareFunc);
pTree.AVLTree_Insert(newData, RockType.CompareFunc);
rt[id] = newData;
return true;
}
public static int CompareFunc(RockQueue a, RockQueue b)
{
return RockType.CompareFunc(a.GetMax(), b.GetMax());
}
}
public class RockFunc
{
/// <summary>
/// 计算inlist和linkNum
/// </summary>
/// <param name=”u”>存放节点间的指向关系</param>
/// <param name=”len”>len是节点的个数</param>
public static LINK compute_linkNum(bool[,] u, int len,int k)
{
LINK link=new LINK(len,k);
/// <summary>
/// inlist[i,]是指向i节点的所有节点集合
/// </summary>
int[,] inlist;
/// <summary>
/// top[i]为指向第i个节点的点的个数,就是inlist[i,]中元素个数
/// </summary>
int[] top;
inlist = new int[len, len];
top = new int[len];
int i = 0, j = 0;
for (i = 0; i < len; i++)
{
for (j = 0; j < len; j++)
{
if (u[j, i]) inlist[i, top[i]++] = j;
}
}
for (i = 0; i < len; i++)
{
int sumn = top[i];
for (j = 0; j < sumn – 1; j++)
for (int l = j+1; l < sumn; l++)
{
link.linkNum[inlist[i, j], inlist[i, l]]++;
link.linkNum[inlist[i, l], inlist[i, j]]++;
}
}
return link;
}
public static double gFunc(int i, int j, LINK link)
{
const double w=0.5;
double mi=1.0+2.0*w;
return ((double)link.linkNum[i, j]) / (
Math.Pow((link.setN[i] + link.setN[j]), mi) – Math.Pow(link.setN[i], mi) – Math.Pow(link.setN[j], mi));
}
public static RockQueue build_local_heap(LINK link, int s)
{
RockQueue qs = new RockQueue(link.setN.Length);
for (int i = 0; i < link.len; i++)
{
if(i==s) continue;
double gkey=gFunc(i,s,link);
qs.Insert(i, gkey);
}
return qs;
}
public static RockQueue build_global_heap(LINK link,RockQueue[] q)
{
RockQueue gheap = new RockQueue(link.setN.Length);
for (int i = 0; i < link.len; i++)
{
RockType iMax = q[i].GetMax();
RockType iRT = new RockType(i, iMax.gkey);
gheap.Insert(iRT);
}
return gheap;
}
public static int size(RockQueue Q)
{
return Q.Size();
}
public static int extract_max(RockQueue Q)
{
return Q.ExtractMax().id;
}
public static int max(RockQueue qu)
{
return qu.GetMax().id;
}
public static bool delete(RockQueue queue, int id)
{
if (queue == null) return false;
return queue.Delete(id);
}
public static int merge(int u, int v,ref LINK link)
{
int w=link.newElement();
link.setN[w] = link.setN[u] + link.setN[v];
link.sets[u] = link.sets[v] = w;
return w;
}
public static List<int> UnionElements(int u, int v,RockQueue[] q)
{
int len = q[u].Capacity;
List<int> list = new List<int>(len);
for (int i = 0; i < len; i++)
{
if (q[u].rt[i] != null || q[v].rt[i] != null)
{
list.Add(i);
}
}
return list;
}
public static void deallocate(ref RockQueue queue)
{
queue = null;
}
public static bool insert(RockQueue queue,int w,double gkey)
{
if (queue == null) return false;
return queue.Insert(w, gkey);
}
public static bool insert(RockQueue Q, int w, RockQueue qw)
{
return Q.Insert(w, qw.GetMax().gkey);
}
public static void update(RockQueue Q, int x, RockQueue qx)
{
if (qx == null) return;
Q.Update(x, qx.GetMax().gkey);
}
}
public class LINK
{
/// <summary>
/// linkNum[i,j]表示节点i和j公共指向节点的个数
/// </summary>
public int[,] linkNum;
public int len,now;
public int[] setN;
public int[] sets;
public int newElement()
{
return now++;
}
public LINK(int Length,int k)
{
len = Length + Length-k;
this.linkNum = new int[len, len];
this.setN = new int[len];
this.sets = new int[len];
for (int i = 0; i < len; i++)
{
setN[i] = 1;
sets[i] = i;
}
len = Length;
now = len;
}
}
public class RockCluster
{
LINK link;
public int[] RockAlgorithm(int k, bool[,] S, int len)
{
link = RockFunc.compute_linkNum(S, len,k);
RockQueue[] q=new RockQueue[len+ len – k];
for (int s = 0; s < len; s++)
q[s] = RockFunc.build_local_heap(link, s);
RockQueue Q = RockFunc.build_global_heap(link, q);
while (RockFunc.size(Q) > k)
{
int u = RockFunc.extract_max(Q);
int v = RockFunc.max(q[u]);
RockFunc.delete(Q, v);
int w = RockFunc.merge(u, v, ref link);
q[w] = new RockQueue(len + len – k);
List<int> xset = RockFunc.UnionElements(u, v, q);
foreach (int x in xset)
{
link.linkNum[x, w] = link.linkNum[x, u] + link.linkNum[x, v];
RockFunc.delete(q[x], u); RockFunc.delete(q[x], v);
RockFunc.insert(q[x],w,RockFunc.gFunc(x,w,link));
if(Q.rt[x]!=null) RockFunc.insert(q[w],x,RockFunc.gFunc(x,w,link));
RockFunc.update(Q,x,q[x]);
}
RockFunc.insert(Q,w,q[w]);
RockFunc.deallocate(ref q[u]); RockFunc.deallocate(ref q[v]);
}
int[] bloggerGroupIds = new int[len + len – k];
link.sets.CopyTo(bloggerGroupIds, 0);
for (int i = 0; i < len; i++)
{
int mid=i;
while (mid != bloggerGroupIds[mid])
{
mid = bloggerGroupIds[mid];
}
bloggerGroupIds[i] = mid;
}
int[] bloggerGroupIds2 = new int[len];
for (int i = 0; i < len; i++)
bloggerGroupIds2[i] = bloggerGroupIds[i];
return bloggerGroupIds2;
}
}
}
// public static void showBloggerIds(int[] BloggerIds)
// {
// for (int i = 0; i < BloggerIds.Length; i++)
// Console.WriteLine(“{0},{1}”,i,BloggerIds[i]);
// }
// static void Main(string[] args)
// {
// RockCluster rc = new RockCluster();
// bool [,]S=new bool[5,5];
// S[0, 4] = true;
// S[1, 2] = true;
// S[2, 4] = true;
// S[3, 4] = true;
// S[2, 3] = true;
// S[0, 3] = true;
// S[4, 2] = true;
// int len=5;
// int []BloggerIds = rc.RockAlgorithm(2, S, len);
// showBloggerIds(BloggerIds);
//}