用户有个开关,可以记录用户每次开关灯的状态,并且能够记录开关灯前后的光敏值。一般用户每天开灯时间集中在晚上,关灯时间也基本集中在晚上,但是有一些异常值,比如起夜上厕所开灯、上完厕所再关灯。获取用户开关灯的光敏值若干组,用K-means算法聚类出用户开关灯的阈值。
以开灯的数据为例:
序号 | 开灯前 |
1 | 500 |
2 | 550 |
3 | 531 |
4 | 460 |
5 | 580 |
6 | 100 |
7 | 30 |
8 | 560 |
9 | 40 |
10 | 900 |
11 | 400 |
使用K-means算法将数据分为3类,哪个标签的样本数量最多即为阈值:
程序如下:
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
class mL
{
public:
void kmeans(vector<int> &sample,int k)
{
int len = sample.size();
vector<int> meanValue(k,0);
//初始化均值
for(int i = 0;i < k; ++i)
meanValue[i] = sample[i];
while(1)
{
vector<vector<int> > C(k,vector<int>(k,0)); //用于存储类别
vector<int> numC(k,0);
//计算每个样本与各个均值的距离
for(int i = 0; i < len;++i)
{
int minDis = abs(sample[i] - meanValue[0]);
int minDisIndex = 0;
for(int j = 1; j < k; ++j)
{
int dis = abs(sample[i] - meanValue[j]);
if(dis < minDis)
{
minDis = dis;
minDisIndex = j;
}
}
//每个样本属于哪个类
C[minDisIndex][numC[minDisIndex]] = sample[i];
numC[minDisIndex]++;
}
//均值更新
int ifBreak = 0;
for(int i = 0; i < k;++i)
{
int Sum = 0;
for(int j = 0; j < numC[i];++j)
{
Sum += C[i][j];
}
int lastMeanValue = meanValue[i];
meanValue[i] = Sum/numC[i];
if(lastMeanValue == meanValue[i]) ifBreak++;
}
//判断均值是否被更新
if(ifBreak == k)
{
int maxNum = 0;
int maxNumIndex = 0;
for(int i = 0;i < k; ++i)
{
if( numC[i] > maxNum)
{
maxNum = numC[i];
maxNumIndex = i;
}
}
cout << meanValue[maxNumIndex] << endl;
//cout << " Break" << endl;
break;
}
}
}
};
int main()
{
int arr[] = {500,550,531,460,580,100,30,560,40,900,400};
vector<int> lightValue(arr,arr + 10);
mL *p = new mL;
p->kmeans(lightValue,3);
delete p;
}