前些日子项目需要做一个地图聚类的功能, 一开始做了使用了 commons-maths的 DBScan算法来实现。但实测效果并不理想, 后来同事Judy发来一个基于固定距离的聚类算法, 从geo points的聚类这个视点看,该算法的实测效果较理想, 性能也比DBScan, Max-Min Distance等聚类更好。
该算法思路简洁有效, 不废话, 直接上代码。
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.ml.clustering.CentroidCluster;
import org.apache.commons.math3.ml.clustering.Cluster;
import org.apache.commons.math3.ml.clustering.Clusterable;
import org.apache.commons.math3.ml.clustering.Clusterer;
import org.apache.commons.math3.ml.distance.DistanceMeasure;
/** * 基于距离的聚类 * * @author Judy.H, Nian.L * @param <T> */
public class WithinDistanceCluster<T extends Clusterable> extends Clusterer<T> {
double theta;
/** * * @param Theta * distance in meters. */
@SuppressWarnings("serial")
public WithinDistanceCluster(double Theta) {
super(new DistanceMeasure() {
@Override
public double compute(double[] a, double[] b) throws DimensionMismatchException {
//经纬度坐标算法 略(此处有优化空间)
return MathUtils.earthDistance(a, b);
}
});
this.theta = Theta;
}
/** * @param distanceMeasure * @param Theta * distance in meters. */
@SuppressWarnings("serial")
public WithinDistanceCluster(DistanceMeasure distanceMeasure, double Theta) {
super(distanceMeasure);
this.theta = Theta;
}
@Override
public List<? extends Cluster<T>> cluster(Collection<T> points)
throws MathIllegalArgumentException, ConvergenceException {
T[] c = (T[]) points.toArray(new Clusterable[0]);
List<CentroidCluster<T>> clusters = new ArrayList<>();
for (int i = 0; i < points.size(); i++) {
T point = c[i];
boolean clustered = false;
for (CentroidCluster<T> cluster : clusters) {
if (distance(point, cluster.getCenter()) < theta) {
cluster.addPoint(point);
clustered = true;
break;
}
}
if (!clustered) {
CentroidCluster<Clusterable> cluster = new CentroidCluster<>(point);
cluster.addPoint(point);
clusters.add((CentroidCluster<T>) cluster);
}
}
return clusters;
}
}