using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
namespace Similarity
{
class myClusterFun
{
public static int[][] CluEntran(int[][] HashArr, int NumberOfCenter)//聚类的函数的入口
{
//程序的初始化及参数的设定
int LoopTimesThres = 300;//设置最大的循环次数
int LoopTimes = 0;//循环次数
int JudgeConverg = 1;//记录是否收敛
int NumberOfDoc = HashArr.Length;//待比较的对象的数量
int AveCenterGap = 0;
int LastAveCenterGap = 0;
int ConverCount = 0;
ArrayList ClusterResult = new ArrayList();//存储每次聚类结果
ArrayList NewCenter = new ArrayList();//存储每次聚类的新中心
ArrayList CenterGap = new ArrayList();//存储每次聚类中心的偏差
int[][] SingleClusterResult = new int[NumberOfCenter][];//暂存每次聚类的结果
int[][] SingleNewCenter = new int[NumberOfCenter][];//暂存每次聚类后形成的新的中心
int[][] SingleLastCenter = new int[NumberOfCenter][];//暂存每次聚类后形成的新的中心
int[] SingleCenterGap = new int[NumberOfCenter];//暂存每次聚类后中心的偏差情况
//初始化:确定初始中心
for (int i = 0; i < NumberOfCenter; i++)
{
SingleLastCenter[i] = new int[HashArr[0].Length];
SingleNewCenter[i] = new int[HashArr[0].Length];
}
int[] TempRec = new int[NumberOfCenter];
TempRec = InitialFunc(NumberOfCenter, NumberOfDoc);
for (int i = 0; i < NumberOfCenter; i++)
{
for (int j = 0; j < HashArr[0].Length; j++)
SingleNewCenter[i][j] = HashArr[TempRec[i]][j];
}
NewCenter.Add(SingleNewCenter);
while (JudgeConverg == 1)//记录的收敛情况
{
//对每个中心进行聚类
for (int i = 0; i < NumberOfCenter; i++)
{
for (int j = 0; j < HashArr[0].Length; j++)
SingleLastCenter[i][j] = SingleNewCenter[i][j];
}
SingleClusterResult = GatherToCenter(ref SingleNewCenter, HashArr, NumberOfCenter);
LoopTimes++;
//记录每次聚类的结果、以及新中心
NewCenter.Add(SingleNewCenter);
ClusterResult.Add(SingleClusterResult);
//判断是否收敛
SingleCenterGap = JudgeConverge(SingleNewCenter, SingleLastCenter, NumberOfCenter);//收敛的结果
CenterGap.Add(SingleCenterGap);//记录收敛结果
for (int i = 0; i < NumberOfCenter; i++)
AveCenterGap += SingleCenterGap[i];
AveCenterGap = AveCenterGap / NumberOfCenter;
if (AveCenterGap == LastAveCenterGap)
ConverCount++;
if (ConverCount > 20 || (LoopTimes == LoopTimesThres))
JudgeConverg = 0;
LastAveCenterGap = AveCenterGap;
AveCenterGap = 0;
}
if (LoopTimes == LoopTimesThres)
Console.WriteLine("Converge failed!!");
return SingleClusterResult;//返回结果
}
public static int[] InitialFunc(int NumberOfCenter, int NumberOfDoc)//确定几个起始点的位置
{
int[] Res = new int[NumberOfCenter];
int step = NumberOfDoc / NumberOfCenter;
int RandomStep = 0;
Random ran = new Random();
RandomStep = ran.Next(NumberOfDoc) + 1;
for (int i = 0; i < NumberOfCenter; i++)
{
Res[i] = (step * i + RandomStep) % NumberOfDoc;
}
return Res;
}
public static int[][] GatherToCenter(ref int[][] SingleNewCenter, int[][] HashArr, int NumberOfCenter)//对给定的点进行聚类,同时得到新的中心
{
int dis;//中间变量,记录距离
int rec = 0;//中间变量,记录属于的类号
int NumberOfDoc = HashArr.Length;
int[] ClusterNum = new int[NumberOfCenter];//记录每一个类的数量
int[] LoopTemp = new int[NumberOfDoc];//记录类号的中间数组,数组元素的值是类号,数组下标表示元素
int[][] ResRecord = new int[NumberOfCenter][];//记录聚类结果
int[][] CenterSum = new int[NumberOfCenter][];//求中心时的中间变量,对各元素做和
//分类的过程
for (int i = 0; i < NumberOfDoc; i++)
{
dis = 0;
for (int j = 0; j < NumberOfCenter; j++)
{
if (CosComp(HashArr[i], SingleNewCenter[j]) > dis)
{
dis = CosComp(HashArr[i], SingleNewCenter[j]);
rec = j;
}
}
LoopTemp[i] = rec;
ClusterNum[rec]++;
}
//将分类的结果进行记录
for (int i = 0; i < NumberOfCenter; i++)
ResRecord[i] = new int[ClusterNum[i]];
int ClusterTemp;
int[] ClusterIndexTemp = new int[NumberOfCenter];
for (int i = 0; i < NumberOfDoc; i++)
{
ClusterTemp = LoopTemp[i];
ResRecord[ClusterTemp][ClusterIndexTemp[ClusterTemp]] = i;
ClusterIndexTemp[ClusterTemp]++;
}
//计算新的中心
for (int i = 0; i < NumberOfCenter; i++)
{
CenterSum[i] = new int[HashArr[0].Length];
if (ClusterNum[i] == 0) //对于类成员数为零的,直接用上一个中心
{
for (int k = 0; k < HashArr[0].Length; k++)
{
CenterSum[i][k] += SingleNewCenter[i][k];
}
ClusterNum[i]++;
}
else
{
for (int j = 0; j < ResRecord[i].Length; j++)
{
int t = ResRecord[i][j];
for (int k = 0; k < HashArr[0].Length; k++)
CenterSum[i][k] = HashArr[t][k] + CenterSum[i][k];
}
}
}
for (int i = 0; i < NumberOfCenter; i++)
{
for (int j = 0; j < HashArr[0].Length; j++)
{
SingleNewCenter[i][j] = CenterSum[i][j] / ClusterNum[i];
}
}
return ResRecord;
}
public static int[] JudgeConverge(int[][] SingleNewCenter, int[][] LastNewCenter, int NumberOfCenter)//比较前后的中心点
{
int[] Result = new int[NumberOfCenter];
for (int i = 0; i < NumberOfCenter; i++)
Result[i] = CosComp(SingleNewCenter[i], LastNewCenter[i]);
return Result;
}
public static int CosComp(int[] HashArr1, int[] HashArr2)//求向量的Cos距离
{
double Vdot = VecDot(HashArr1, HashArr2);
double l1 = VecMod(HashArr1);
double l2 = VecMod(HashArr2);
double doubleresult = (Vdot / (l1 * l2)) * 100;
int result = (int)Math.Floor(doubleresult);//将结果近似为整数
return result;
}
public static double VecDot(int[] a, int[] b)//求向量的点积
{
double sum = 0;
for (int i = 0; i < a.Length; i++)
{
sum += a[i] * b[i];
}
return sum;
}
public static double VecMod(int[] a)//求向量的模
{
double sum = 0;
for (int i = 0; i < a.Length; i++)
{
sum += a[i] * a[i];
}
sum = Math.Sqrt(sum);
return sum;
}
}
}
K 均值聚类算法-C#
原文作者:聚类算法
原文地址: https://blog.csdn.net/new_shows/article/details/51234998
本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
原文地址: https://blog.csdn.net/new_shows/article/details/51234998
本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。