基于距离的地图聚类算法, Java实现

前些日子项目需要做一个地图聚类的功能, 一开始做了使用了 commons-maths的 DBScan算法来实现。但实测效果并不理想, 后来同事Judy发来一个基于固定距离的聚类算法, 从geo points的聚类这个视点看,该算法的实测效果较理想, 性能也比DBScan, Max-Min Distance等聚类更好。

该算法思路简洁有效, 不废话, 直接上代码。


import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.ml.clustering.CentroidCluster;
import org.apache.commons.math3.ml.clustering.Cluster;
import org.apache.commons.math3.ml.clustering.Clusterable;
import org.apache.commons.math3.ml.clustering.Clusterer;
import org.apache.commons.math3.ml.distance.DistanceMeasure;

/** * 基于距离的聚类 * * @author Judy.H, Nian.L * @param <T> */
public class WithinDistanceCluster<T extends Clusterable> extends Clusterer<T> {
    double theta;

    /** * * @param Theta * distance in meters. */
    @SuppressWarnings("serial")
    public WithinDistanceCluster(double Theta) {
        super(new DistanceMeasure() {

            @Override
            public double compute(double[] a, double[] b) throws DimensionMismatchException {
                //经纬度坐标算法 略(此处有优化空间)
                return MathUtils.earthDistance(a, b);  
            }
        });
        this.theta = Theta;
    }

    /** * @param distanceMeasure * @param Theta * distance in meters. */
    @SuppressWarnings("serial")
    public WithinDistanceCluster(DistanceMeasure distanceMeasure, double Theta) {
        super(distanceMeasure);
        this.theta = Theta;
    }

    @Override
    public List<? extends Cluster<T>> cluster(Collection<T> points)
            throws MathIllegalArgumentException, ConvergenceException {
        T[] c = (T[]) points.toArray(new Clusterable[0]);

        List<CentroidCluster<T>> clusters = new ArrayList<>();

        for (int i = 0; i < points.size(); i++) {
            T point = c[i];
            boolean clustered = false;
            for (CentroidCluster<T> cluster : clusters) {
                if (distance(point, cluster.getCenter()) < theta) {
                    cluster.addPoint(point);
                    clustered = true;
                    break;
                }
            }
            if (!clustered) {
                CentroidCluster<Clusterable> cluster = new CentroidCluster<>(point);
                cluster.addPoint(point);
                clusters.add((CentroidCluster<T>) cluster);
            }
        }

        return clusters;
    }

}
    原文作者:聚类算法
    原文地址: https://blog.csdn.net/weixin_39897965/article/details/77387110
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞