聚类的应用
上周在互联网浏览无意间了解到雷达信号处理的最后一步是点迹输出,用的是聚类算法。
搜索了下聚类算法,好多呀!不过发现kmeans是我之前学机器学习课程中KNN分类用的。python只要几行代码就解决了,用C++演练一把,就当我在开发雷达了,没有代码优化,仅仅实现功能~
Kmeans的思想
就是根据离哪类近就判断属于哪类,来进行归类和预测的。
- 初始化K个类,然后每个类初始化一个中心点。
- 对比每个点到类的距离,到哪个类近就属于哪类。
- 通过均值更新每一类的中心。
- 循环第2和第3步骤,知道每个类的中心不再移动。
昨天完成code,今天测试调试了下,主要解决了一些小bug和一个大bug(在执行完聚类过程后,居然某一类的数量为0个。至少应该有一个为它本身吧!后来通过万能的debug解决了)
重要参数介绍
我的初始化值用的是随机选择,初始化值选的不适合,它更新中心点的次数也会曾多。
如下代码的输入点为24个,选择设置为4类,测试点为2个。
同学们有需要可以自行修改这些可配置参数,以适配你的应用。
C++代码
KNN.hpp
/* Author: AppleCai Date 2019/04/28 */
#include<vector>
#include<iostream>
#include<cmath>
using namespace std;
#define OBJ_LEN 24
#define TEST_LEN 2
class Kmeans
{
public:
typedef struct mypoint
{
float x;
float y;
int tag;
}st_mypoint;
enum my_enum
{
NO=0,
YES
};
Kmeans(int KgNum);
void dataInit(st_mypoint input[]);
void updateCenter(st_mypoint point[]); //set Kcenter;
float calDistance(st_mypoint p1,st_mypoint p2); //Euclidean distance
void Cluster(st_mypoint point[]); //set the Kgrouplist;
void KNN(st_mypoint input[]);
bool checkEnd(vector<st_mypoint> p1group,vector<st_mypoint> p2group);
void KNNtest(st_mypoint input[]);
private:
int KgroupNum; //the number of cluster
vector<st_mypoint> Kcenter; //the center of each cluster
vector<vector<int>> Kgrouplist; //the point to the related group
};
KNN.cpp
/* Author: AppleCai Date 2019/04/28 */
#include "KNN.hpp"
Kmeans::Kmeans(int KgNum)
{
KgroupNum = KgNum;
}
/* init the K group and select the group center */
void Kmeans::dataInit(st_mypoint input[])
{
int i,N;
for(i=0;i<KgroupNum;i++)
{
N = rand() % OBJ_LEN;
Kcenter.push_back(input[N]);
cout<<"Kcenter="<<Kcenter[i].x<<" "<<Kcenter[i].y<<endl;
}
for(i=0;i<KgroupNum;i++) /* we shall init the space,otherwise it will said not available in the running time*/
{
vector<int> temp;
Kgrouplist.push_back(temp);
}
}
/*Calculate average value,update center*/
void Kmeans::updateCenter(st_mypoint point[])
{
int i=0;
int j=0;
for(i=0;i<KgroupNum;i++)
{
st_mypoint sum={0,0};
for(j=0;j<Kgrouplist.at(i).size();j++)
{
point[Kgrouplist[i][j]].tag = i; /* add tag to each of the point */
// for debug
// cout<<"Kgrouplist[i][j]"<<point[Kgrouplist[i][j]].x<<point[Kgrouplist[i][j]].y<<"size"<<Kgrouplist.at(i).size()<<endl;
sum.x=(sum.x+point[(Kgrouplist[i][j])].x);
sum.y=(sum.y+point[(Kgrouplist[i][j])].y);
}
sum.x = sum.x/Kgrouplist.at(i).size();
sum.y = sum.y/Kgrouplist.at(i).size();
Kcenter[i]=sum;
Kcenter[i].tag = i; /* add tag for center */
cout<<"updateCenter=["<<i<<"]="<<sum.x<<" "<<sum.y<<" "<<Kcenter[i].tag<<endl;
}
}
/*Euclidean distance*/
float Kmeans::calDistance(st_mypoint p1,st_mypoint p2)
{
float dis=0;
dis=sqrt(pow((p1.x-p2.x),2)
+pow((p1.y-p2.y),2)
);
return dis;
}
/* Cluster */
void Kmeans::Cluster(st_mypoint point[])
{
float mindistance;
float curdistance;
int index=0;
int i,j;
//shall add, otherwise the size of Kgrouplist will become large
for(i=0;i<KgroupNum;i++)
{
Kgrouplist.at(i).clear();
}
for(j=0;j<OBJ_LEN;j++)
{
/* calculate the distance between the first group and point,pretend that's the smallest one*/
mindistance= calDistance(Kcenter[0],point[j]);
index=0; /* prepare group list index, suppose the first group is the nearst one*/
for(i=1;i<KgroupNum;i++)
{
curdistance = calDistance(Kcenter[i],point[j]);
if(curdistance<mindistance)
{
mindistance = curdistance;
index=i;
}
}
/* put the point to the minimal group list */
Kgrouplist.at(index).push_back(j);
}
/* TODO: used for debug,later shall delete*/
// for(i=0;i<KgroupNum;i++)
// {
// cout<<"Size of grouplist="<<Kgrouplist[i].size()<<endl;
// for(j=0;j<Kgrouplist[i].size();j++)
// {
// cout<<"Kgrouplist["<<i<<"]["<<j<<"]="<<Kgrouplist[i][j]<<endl;
// }
// cout<<"next group:"<<endl;
// }
}
bool Kmeans::checkEnd(vector<st_mypoint> p1group,vector<st_mypoint> p2group)
{
bool ret = YES;
int i;
for(i=0;i<KgroupNum;i++)
{ /* set the check condition to stop KNN */
if(fabs(p1group[i].x - p2group[i].x)>0.001||
fabs(p1group[i].y - p2group[i].y)>0.001
)
{
ret = NO;
}
}
return ret;
}
void Kmeans::KNN(st_mypoint input[])
{
bool flag=NO;
int cnt=0;
int i;
dataInit(input);
vector<st_mypoint> preKcenter(KgroupNum);
for(i=0;i<KgroupNum;i++)
{
preKcenter[i].x=0;
preKcenter[i].y=0;
}
while((NO == flag)&&(cnt<100))
{
cnt++;
Cluster(input);
updateCenter(input);
flag = checkEnd(preKcenter,Kcenter);
copy(Kcenter.begin(),Kcenter.end(),preKcenter.begin());
}
cout<<"cnt="<<cnt<<endl;
for(i=0;i<KgroupNum;i++)
{
cout<<"Kcenter["<<i<<"]="<<Kcenter[i].x<<" "<<Kcenter[i].y<<endl;
}
}
void Kmeans::KNNtest(st_mypoint input[])
{
int i,j;
float mindistance,curdistance;
for(j=0;j<TEST_LEN;j++)
{
/* calculate the distance between the first group and point,pretend that's the smallest one*/
mindistance= calDistance(Kcenter[0],input[j]);
input[j].tag = 0; /* pretend all the initial tag is 0 */
for(i=1;i<KgroupNum;i++)
{
curdistance = calDistance(Kcenter[i],input[j]);
if(curdistance<mindistance)
{
mindistance = curdistance;
input[j].tag = i; /* if found the smaller distance,update tag */
cout<<"input["<<i<<"].tag="<<i<<endl;
}
}
}
// for debug
// for(j=0;j<KgroupNum;j++)
// {
// cout<<"The result of TestData["<<j<<"] is "<<input[j].tag<<endl;
// }
}
main.cpp
/* Author: AppleCai Date 2019/04/28 */
#include<iostream>
#include<fstream>
#include<vector>
#include"KNN.hpp"
using namespace std;
int main(void)
{
Kmeans::st_mypoint TrainData[OBJ_LEN];
Kmeans::st_mypoint TestData[TEST_LEN];
int i;
ifstream fin("in.txt");
ofstream fout("out.txt");
for(int i=0;i<OBJ_LEN;i++)
{
fin>>TrainData[i].x>>TrainData[i].y;
cout<<TrainData[i].x<<","<<TrainData[i].y<<endl;
}
fin.close();
Kmeans mytest(4); //Kgourp set to 4
mytest.KNN(TrainData);
TestData[0].x = 6;
TestData[0].y = 6;
TestData[1].x = 0;
TestData[1].y = 1;
mytest.KNNtest(TestData);
for(i=0;i<TEST_LEN;i++)
{
fout<<"The result of TestData is:"<<TestData[i].x<<" "<<TestData[i].y<<" "<<TestData[i].tag<<endl;
}
return 0;
}