K-means算法实战-一维数据的聚类

用户有个开关,可以记录用户每次开关灯的状态,并且能够记录开关灯前后的光敏值。一般用户每天开灯时间集中在晚上,关灯时间也基本集中在晚上,但是有一些异常值,比如起夜上厕所开灯、上完厕所再关灯。获取用户开关灯的光敏值若干组,用K-means算法聚类出用户开关灯的阈值。


以开灯的数据为例:

序号开灯前
1500
2550
3531
4460
5580
6100
730
8560
940
10900
11400

使用K-means算法将数据分为3类,哪个标签的样本数量最多即为阈值:

程序如下:

#include <iostream>
#include <vector>
#include <cmath>
using namespace std;

class mL
{
public:
	
	void kmeans(vector<int> &sample,int k)
	{
		int len = sample.size();
		vector<int> meanValue(k,0);
		//初始化均值
		for(int i = 0;i < k; ++i)
			meanValue[i] = sample[i];
		while(1)
		{
			vector<vector<int> > C(k,vector<int>(k,0));  //用于存储类别
			vector<int> numC(k,0);
			//计算每个样本与各个均值的距离
			for(int i = 0; i < len;++i)
			{
				int minDis = abs(sample[i] - meanValue[0]);
				int minDisIndex = 0;
				for(int j = 1; j < k; ++j)
				{
					int dis = abs(sample[i] - meanValue[j]);
					if(dis < minDis)
					{
						minDis = dis;
						minDisIndex = j;
					}
				}
				//每个样本属于哪个类
				C[minDisIndex][numC[minDisIndex]] =  sample[i];
				numC[minDisIndex]++;
			}
			//均值更新
			int ifBreak = 0;
			for(int i = 0; i < k;++i)
			{
				int Sum = 0;
				for(int j = 0; j < numC[i];++j)
				{
					Sum += C[i][j];
				}
				int lastMeanValue = meanValue[i];
				meanValue[i] = Sum/numC[i];
				if(lastMeanValue == meanValue[i])  ifBreak++;
			}
			//判断均值是否被更新
			if(ifBreak == k)  
			{
				int maxNum = 0;
				int maxNumIndex = 0;
				for(int i = 0;i < k; ++i)
				{
					if( numC[i] > maxNum)
					{
						maxNum = numC[i];
						maxNumIndex = i;
					}
				}
				cout << meanValue[maxNumIndex] << endl;
				//cout << " Break" << endl;
				break;
			}
			
		}
	}

};

int main()
{
	int arr[] = {500,550,531,460,580,100,30,560,40,900,400};
	vector<int> lightValue(arr,arr + 10);
	mL *p = new mL;
	p->kmeans(lightValue,3);
	delete p;
	
}

    原文作者:聚类算法
    原文地址: https://blog.csdn.net/qq_16583687/article/details/76502210
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞