K-means 算法思想:
1.初始化聚类个数及中心点。人为给定。
2.划分数据到每个类。计算样本数据到各聚类中心的距离(欧式距离或其他距离等),把每个样本划分到最近的类中。
3.重新计算类中心点。一般是求坐标平均值。
4.重复2、3步骤。直到聚类中心不再移动位置。
opencv函数kmeans使用方法:
原型:double kmeans(InputArray data, int K, InputOutputArray bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray centers=noArray() )
data:[IN] 输入样本,一行是一个样本。类型float。
K:[IN] 类别数
bestLabels:[OUT] 输出类别标签。与data有相等行数,1列,类型CV_32SC1
criteria:[IN] 精度设置。TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, 1.0)
示例:
void test_kmeans(Mat& img, int K)
{
Mat binary_img;
threshold(img, binary_img, 230, 255, THRESH_BINARY_INV);
Mat centers, labels;
int sample_count = countNonZero(binary_img);
vector<Point2f> samples_point(sample_count);
int temp_count(0);
for (int row = 0; row < binary_img.rows; row++) {
uchar* pData = binary_img.ptr<uchar>(row);
for (int col = 0; col < binary_img.cols; col++) {
if (*pData) {
samples_point[temp_count++] = Point2f(col, row);
}
pData++;
}
}
Mat samples_mat(sample_count, 2, CV_32FC1, &samples_point[0]);
kmeans(samples_mat, K, labels, TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 100, 1.0), 3, KMEANS_PP_CENTERS, centers);
cout << labels.depth() << ", " << labels.type() << endl;
cout << CV_32SC1 << ", " << CV_32S << endl;
Mat drawimg(img.size(), CV_8UC3, Scalar(0, 0, 0));
Vec3b* colors = new Vec3b[K];
RNG rng(12345);
for (int i = 0; i < K; i++) {
colors[i](0) = rng.uniform(0, 255);
colors[i](1) = rng.uniform(0, 255);
colors[i](2) = rng.uniform(0, 255);
}
vector<Point2f> samples_point_convert = Mat_<Point2f>(samples_mat);
int* pLabel = (int*)labels.data;
for (int i = 0; i < sample_count; i++) {
drawimg.at<Vec3b>(samples_point_convert[i]) = colors[*pLabel++];
}
imwrite("binary_img.bmp", binary_img);
imwrite("kmeans.bmp", drawimg);
delete[] colors;
}