K聚类算法

from numpy import *
import numpy

def loadDataSet(fileName):  # general function to parse tab -delimited floats
    dataMat = []  # assume last column is target value
    fr = open(fileName)
    for line in fr.readlines():
        temp=[]
        curLine = line.strip().split('\t')
        for i in range(len(curLine)):
            temp.append(float(curLine[i]))
        dataMat.append(temp)
    dat=array(dataMat)
    return dat

def distEclud(vecA, vecB):
    return sqrt(sum(power(vecA - vecB, 2)))  # la.norm(vecA-vecB)

def randCent(dataSet, k):
    n = shape(dataSet)[1]
    centroids = mat(zeros((k, n)))  # create centroid mat
    for j in range(n):  # create random cluster centers, within bounds of each dimension
        # print(dataSet[:, j])
        minJ = min(dataSet[ : ,j])
        rangeJ = float(max(dataSet[:, j]) - minJ)
        centroids[:, j] = mat(minJ + rangeJ * random.rand(k, 1))
    return centroids


def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent):
    m = shape(dataSet)[0]
    clusterAssment = mat(zeros((m, 2)))  # create mat to assign data points
    # to a centroid, also holds SE of each point
    centroids = createCent(dataSet, k)
    clusterChanged = True
    while clusterChanged:
        clusterChanged = False
        for i in range(m):  # for each data point assign it to the closest centroid
            minDist = inf;
            minIndex = -1
            for j in range(k):#求出4个点的距离
                distJI = distMeas(centroids[j, :], dataSet[i, :])
                if distJI < minDist:
                    minDist = distJI;
                    minIndex = j
            if clusterAssment[i, 0] != minIndex: clusterChanged = True
            clusterAssment[i, :] = minIndex, minDist ** 2#选择最近距离
        # print (centroids)
        for cent in range(k):  # recalculate centroids
            # print(nonzero(clusterAssment[:, 0].A == cent))
            ptsInClust = dataSet[nonzero(clusterAssment[:, 0].A == cent)[0]]  # get all the point in this cluster
            centroids[cent, :] = mean(ptsInClust, axis=0)  # assign centroid to mean
    print(centroids)
    print(clusterAssment)
    return centroids, clusterAssment
dataMat = loadDataSet('testSet.txt')
kMeans(dataMat,4)

[[-3.38237045 -2.9473363 ]

 [ 2.6265299   3.10868015]

 [ 2.80293085 -2.7315146 ]

 [-2.46154315  2.78737555]]

[[1.00000000e+00 2.32019150e+00]

 [3.00000000e+00 1.39004893e+00]

 [2.00000000e+00 6.63839104e+00]

 [0.00000000e+00 4.16140951e+00]

 [1.00000000e+00 2.76967820e+00]

 [3.00000000e+00 2.80101213e+00]

 [2.00000000e+00 5.85909807e+00]

 [0.00000000e+00 1.50646425e+00]

 [1.00000000e+00 2.29348924e+00]

 [3.00000000e+00 6.45967483e-01]

 [2.00000000e+00 1.74010499e+00]

 [0.00000000e+00 3.77769471e-01]

 [1.00000000e+00 2.51695402e+00]

 [3.00000000e+00 1.38716420e-01]

 [2.00000000e+00 9.47633071e+00]

 [0.00000000e+00 9.97310599e+00]

 [1.00000000e+00 2.39726914e+00]

 [3.00000000e+00 3.10242360e+00]

 [2.00000000e+00 4.11084375e-01]

 [0.00000000e+00 4.74890795e-01]

 [1.00000000e+00 1.38706133e-01]

 [3.00000000e+00 5.10240996e-01]

 [2.00000000e+00 1.05700176e+00]

 [0.00000000e+00 2.90181828e-02]

 [1.00000000e+00 1.31601105e+00]

 [3.00000000e+00 9.08203769e-01]

 [2.00000000e+00 5.02608557e-01]

 [0.00000000e+00 4.57942717e-01]

 [1.00000000e+00 2.13786618e-01]

 [3.00000000e+00 4.05632356e+00]

 [2.00000000e+00 5.14171888e+00]

 [0.00000000e+00 5.56237495e-01]

 [1.00000000e+00 4.76142736e-01]

 [3.00000000e+00 1.54414110e+00]

 [2.00000000e+00 6.10930460e+00]

 [0.00000000e+00 9.47660177e-01]

 [1.00000000e+00 4.87745774e+00]

 [3.00000000e+00 3.12703929e+00]

 [2.00000000e+00 6.45118831e-03]

 [0.00000000e+00 3.01415411e-01]

 [1.00000000e+00 8.84955695e-01]

 [3.00000000e+00 7.98870968e-02]

 [2.00000000e+00 5.23673430e-01]

 [0.00000000e+00 3.24171404e+00]

 [1.00000000e+00 9.32523506e-02]

 [3.00000000e+00 9.13705455e-01]

 [2.00000000e+00 1.25766593e+00]

 [0.00000000e+00 4.09563895e-01]

 [1.00000000e+00 9.46987842e-01]

 [3.00000000e+00 2.63836399e+00]

 [2.00000000e+00 5.20371222e-01]

 [0.00000000e+00 1.86796790e+00]

 [1.00000000e+00 5.46768776e+00]

 [3.00000000e+00 5.73153563e+00]

 [2.00000000e+00 3.12040332e-01]

 [0.00000000e+00 3.93986735e-01]

 [1.00000000e+00 1.32864695e+00]

 [3.00000000e+00 2.38032454e-02]

 [2.00000000e+00 1.07872914e+00]

 [0.00000000e+00 4.35369355e-01]

 [1.00000000e+00 4.55502856e-01]

 [3.00000000e+00 1.96212809e-02]

 [2.00000000e+00 1.95213538e+00]

 [0.00000000e+00 1.54154401e+00]

 [1.00000000e+00 1.26364010e+00]

 [3.00000000e+00 1.33108375e+00]

 [2.00000000e+00 3.02422139e-01]

 [0.00000000e+00 5.58860689e-01]

 [1.00000000e+00 9.52516316e-02]

 [3.00000000e+00 6.25129762e-01]

 [2.00000000e+00 8.41875177e-01]

 [0.00000000e+00 2.06159470e+00]

 [1.00000000e+00 6.39227291e+00]

 [3.00000000e+00 2.01200372e-01]

 [2.00000000e+00 3.51030769e+00]

 [0.00000000e+00 9.83287604e-01]

 [1.00000000e+00 7.06014703e-02]

 [3.00000000e+00 2.59901305e-01]

 [2.00000000e+00 3.74491207e+00]

 [0.00000000e+00 2.32143993e+00]]

测试数据:

1.658985	4.285136
-3.453687	3.424321
4.838138	-1.151539
-5.379713	-3.362104
0.972564	2.924086
-3.567919	1.531611
0.450614	-3.302219
-3.487105	-1.724432
2.668759	1.594842
-3.156485	3.191137
3.165506	-3.999838
-2.786837	-3.099354
4.208187	2.984927
-2.123337	2.943366
0.704199	-0.479481
-0.392370	-3.963704
2.831667	1.574018
-0.790153	3.343144
2.943496	-3.357075
-3.195883	-2.283926
2.336445	2.875106
-1.786345	2.554248
2.190101	-1.906020
-3.403367	-2.778288
1.778124	3.880832
-1.688346	2.230267
2.592976	-2.054368
-4.007257	-3.207066
2.257734	3.387564
-2.679011	0.785119
0.939512	-4.023563
-3.674424	-2.261084
2.046259	2.735279
-3.189470	1.780269
4.372646	-0.822248
-2.579316	-3.497576
1.889034	5.190400
-0.798747	2.185588
2.836520	-2.658556
-3.837877	-3.253815
2.096701	3.886007
-2.709034	2.923887
3.367037	-3.184789
-2.121479	-4.232586
2.329546	3.179764
-3.284816	3.273099
3.091414	-3.815232
-3.762093	-2.432191
3.542056	2.778832
-1.736822	4.241041
2.127073	-2.983680
-4.323818	-3.938116
3.792121	5.135768
-4.786473	3.358547
2.624081	-3.260715
-4.009299	-2.978115
2.493525	1.963710
-2.513661	2.642162
1.864375	-3.176309
-3.171184	-3.572452
2.894220	2.489128
-2.562539	2.884438
3.491078	-3.947487
-2.565729	-2.012114
3.332948	3.983102
-1.616805	3.573188
2.280615	-2.559444
-2.651229	-3.103198
2.321395	3.154987
-1.685703	2.939697
3.031012	-3.620252
-4.599622	-2.185829
4.196223	1.126677
-2.133863	3.093686
4.668892	-2.562705
-2.793241	-2.149706
2.884105	3.043438
-2.967647	2.848696
4.479332	-1.764772
-4.905566	-2.911070

    原文作者:聚类算法
    原文地址: https://blog.csdn.net/u010261063/article/details/80145271
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞