K均值聚类算法,在opencv中通过kmeans()函数实现;k均值通俗讲:就是从一堆样本中,随便挑出几个,比如3个吧,然后用样本中的和这挑出来的比较,比较后排序,谁和挑出的那个接近就把他划到那个类里,比如样A和挑1、挑2、挑3中,挑2最接近,则把样A划到挑2里,当然还没完事,还得再对挑完的3个类群,确定一个类群代表——“均值”,然后以这3个新的代表——“均值”再次与样本对比,直到“均值”不在变了,则迭代终止,聚类完成了;
用数学解释如下:
当然上面是在别处截的图,具体如何用算法实现,如何用数学表达,感兴趣的同学可以看专业资料;K均值聚类算法,对于简单的图像处理应用是可以的,比如简单分割图像什么的;主要k均值要找到所有可能的分类,然后找到最优解花费时间不是一点点了;甚至你给随机样本(没有意义),k均值也能聚类,划分的结果并不一定是你想要的,因为划分规则不同,划分的结果也是不同的;
下面简单实例:
1、产生随机数然后用k均值聚类;
/////////////////////////////////////////////
//1.k_means K均值聚类
/////////////////////////////////////////////
#define WINDOW_1 "Before cluster"
#define WINDOW_2 "After cluster"
Mat dstIamge(500, 500, CV_8UC3);
Mat A_dstIamge = dstIamge/*.clone()*/;
RNG rng(12345); //随机数产生器
int clusterCount;
int sampleCount;
int MIN_SAMPLECOUNT = 400, MIN_SLUSTERCOUNTS = 5;
void cluster(int, void*)
{
Scalar colorTab[] = //最多显示7类有颜色的,所以最多也就给7个颜色
{
Scalar(0, 0, 255),
Scalar(0, 255, 0),
Scalar(0, 255, 255),
Scalar(255, 0, 0),
Scalar(255, 255, 0),
Scalar(255, 0, 255),
Scalar(255, 255, 255),
Scalar(200, 200, 255),
};
clusterCount = rng.uniform(2, MIN_SLUSTERCOUNTS + 1);//产生之间的整数个类别!
sampleCount = rng.uniform(1, MIN_SAMPLECOUNT + 1);//产生1到1001个整数样本数,也就是一千个样本
Mat points(sampleCount, 1, CV_32FC2), labels; //产生的样本数,实际上为2通道的列向量,元素类型为Point2f
clusterCount = MIN(clusterCount, sampleCount);
Mat centers(clusterCount, 1, points.type()); //用来存储聚类后的中心点
/* generate random sample from multigaussian distribution */
for (int k = 0; k < clusterCount; k++) // Generate random points
{
Point center;// Random point coordinate
center.x = rng.uniform(0, dstIamge.cols);
center.y = rng.uniform(0, dstIamge.rows);
Mat pointChunk = points.rowRange(k*sampleCount / clusterCount,
k == clusterCount - 1 ? sampleCount : (k + 1)*sampleCount / clusterCount); //最后一个类的样本数不一定是平分的,
//剩下的一份都给最后一类
// Each of the classes is the same variance, but the mean is different.
rng.fill(pointChunk, CV_RAND_NORMAL, Scalar(center.x, center.y),//the mean
Scalar(dstIamge.cols*0.05, dstIamge.rows*0.05)); //the same variance
}
randShuffle(points, 1, &rng); //因为要聚类,所以先随机打乱points里面的点,注意points和pointChunk是共用数据的。
dstIamge = Scalar::all(0);
for (int i = 0; i < sampleCount; i++)
{
Point p = points.at<Point2f>(i);// Coordinates of corresponding points
circle(A_dstIamge, p, 1, Scalar::all(255), CV_FILLED, CV_AA);
}
imshow(WINDOW_1, A_dstIamge);
kmeans(points, clusterCount, labels,//labels表示每一个样本的类的标签,是一个整数,从0开始的索引整数,是簇数.
TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, 1.0),//用最大迭代次数或者精度作为迭代条件,看哪个条件先满足
3, //聚类3次,取结果最好的那次,
KMEANS_PP_CENTERS,//则表示为随机选取初始化中心点,聚类的初始化采用PP特定的随机算法。
centers); //参数centers表示的是聚类后的中心点存放矩阵。
// Traverse each point
for (int i = 0; i < sampleCount; i++)
{
int clusterIdx = labels.at<int>(i);// A label has been completed by clustering
Point p = points.at<Point2f>(i);// Coordinates of corresponding points
circle(dstIamge, p, 1, colorTab[clusterIdx], CV_FILLED, CV_AA);
}
imshow(WINDOW_2, dstIamge);
}
void Ml_Kmeans()//39.k_means K均值聚类
{
while (1)
{
namedWindow(WINDOW_1, WINDOW_AUTOSIZE);
createTrackbar("samleCounts: ", WINDOW_1, &MIN_SAMPLECOUNT, 1000, cluster);
cluster(0, 0);
createTrackbar("clusterCounts: ", WINDOW_1, &MIN_SLUSTERCOUNTS, 10, cluster);
cluster(0, 0);
char key = (char)waitKey(); //wait forever
if (key == 27 || key == 'q' || key == 'Q')
break;
}
}
运行结果如下:
2、对图像进行K均值聚类:
///////////////////////////////////
//k均值主要是一种聚类算法,简单说就是将一个样本里的内容进行分类处理;
//在图像处理时候经常要主动定义和查找特征,比如阈值分割什么的,但是
//k均值不用定义特征,而是把特征相似的分类了;
//所以我们不用关心特征是什么,因为k均值已经区分特征;但是说来很理想,
//事实上分类出来或者分割出来的并不一定是理想中的样子
////////////////////////////////////
void Ml_Kmeans2()//39.k_means K均值聚类2
{
Mat src = imread("D:/ImageTest/test2.jpg");
imshow("input", src);
int width = src.cols;
int height = src.rows;
int dims = src.channels();
// 初始化定义
int sampleCount = width*height;
int clusterCount = 4;
Mat points(sampleCount, dims, CV_32F, Scalar(10));
Mat labels;
Mat centers(clusterCount, 1, points.type());
// 图像RGB到数据集转换
int index = 0;
for (int row = 0; row < height; row++) {
for (int col = 0; col < width; col++) {
index = row*width + col;
Vec3b rgb = src.at<Vec3b>(row, col);
points.at<float>(index, 0) = static_cast<int>(rgb[0]);
points.at<float>(index, 1) = static_cast<int>(rgb[1]);
points.at<float>(index, 2) = static_cast<int>(rgb[2]);
}
}
// 运行K-Means数据分类
TermCriteria criteria = TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, 1.0);
kmeans(points, clusterCount, labels, criteria, 3, KMEANS_PP_CENTERS, centers);
// 显示图像分割结果
Mat result = Mat::zeros(src.size(), CV_8UC3);
for (int row = 0; row < height; row++)
{
for (int col = 0; col < width; col++)
{
index = row*width + col;
int label = labels.at<int>(index, 0);
if (label == 1)
{
result.at<Vec3b>(row, col)[0] = 255;
result.at<Vec3b>(row, col)[1] = 0;
result.at<Vec3b>(row, col)[2] = 0;
}
else if (label == 2)
{
result.at<Vec3b>(row, col)[0] = 0;
result.at<Vec3b>(row, col)[1] = 255;
result.at<Vec3b>(row, col)[2] = 0;
}
else if (label == 3)
{
result.at<Vec3b>(row, col)[0] = 0;
result.at<Vec3b>(row, col)[1] = 0;
result.at<Vec3b>(row, col)[2] = 255;
}
else if (label == 0)
{
result.at<Vec3b>(row, col)[0] = 0;
result.at<Vec3b>(row, col)[1] = 255;
result.at<Vec3b>(row, col)[2] = 255;
}
}
}
imshow("kmeans-demo", result);
waitKey(0);
}
运行结果: