from numpy import *
import numpy
def loadDataSet(fileName): # general function to parse tab -delimited floats
dataMat = [] # assume last column is target value
fr = open(fileName)
for line in fr.readlines():
temp=[]
curLine = line.strip().split('\t')
for i in range(len(curLine)):
temp.append(float(curLine[i]))
dataMat.append(temp)
dat=array(dataMat)
return dat
def distEclud(vecA, vecB):
return sqrt(sum(power(vecA - vecB, 2))) # la.norm(vecA-vecB)
def randCent(dataSet, k):
n = shape(dataSet)[1]
centroids = mat(zeros((k, n))) # create centroid mat
for j in range(n): # create random cluster centers, within bounds of each dimension
# print(dataSet[:, j])
minJ = min(dataSet[ : ,j])
rangeJ = float(max(dataSet[:, j]) - minJ)
centroids[:, j] = mat(minJ + rangeJ * random.rand(k, 1))
return centroids
def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent):
m = shape(dataSet)[0]
clusterAssment = mat(zeros((m, 2))) # create mat to assign data points
# to a centroid, also holds SE of each point
centroids = createCent(dataSet, k)
clusterChanged = True
while clusterChanged:
clusterChanged = False
for i in range(m): # for each data point assign it to the closest centroid
minDist = inf;
minIndex = -1
for j in range(k):#求出4个点的距离
distJI = distMeas(centroids[j, :], dataSet[i, :])
if distJI < minDist:
minDist = distJI;
minIndex = j
if clusterAssment[i, 0] != minIndex: clusterChanged = True
clusterAssment[i, :] = minIndex, minDist ** 2#选择最近距离
# print (centroids)
for cent in range(k): # recalculate centroids
# print(nonzero(clusterAssment[:, 0].A == cent))
ptsInClust = dataSet[nonzero(clusterAssment[:, 0].A == cent)[0]] # get all the point in this cluster
centroids[cent, :] = mean(ptsInClust, axis=0) # assign centroid to mean
print(centroids)
print(clusterAssment)
return centroids, clusterAssment
dataMat = loadDataSet('testSet.txt')
kMeans(dataMat,4)
[[-3.38237045 -2.9473363 ]
[ 2.6265299 3.10868015]
[ 2.80293085 -2.7315146 ]
[-2.46154315 2.78737555]]
[[1.00000000e+00 2.32019150e+00]
[3.00000000e+00 1.39004893e+00]
[2.00000000e+00 6.63839104e+00]
[0.00000000e+00 4.16140951e+00]
[1.00000000e+00 2.76967820e+00]
[3.00000000e+00 2.80101213e+00]
[2.00000000e+00 5.85909807e+00]
[0.00000000e+00 1.50646425e+00]
[1.00000000e+00 2.29348924e+00]
[3.00000000e+00 6.45967483e-01]
[2.00000000e+00 1.74010499e+00]
[0.00000000e+00 3.77769471e-01]
[1.00000000e+00 2.51695402e+00]
[3.00000000e+00 1.38716420e-01]
[2.00000000e+00 9.47633071e+00]
[0.00000000e+00 9.97310599e+00]
[1.00000000e+00 2.39726914e+00]
[3.00000000e+00 3.10242360e+00]
[2.00000000e+00 4.11084375e-01]
[0.00000000e+00 4.74890795e-01]
[1.00000000e+00 1.38706133e-01]
[3.00000000e+00 5.10240996e-01]
[2.00000000e+00 1.05700176e+00]
[0.00000000e+00 2.90181828e-02]
[1.00000000e+00 1.31601105e+00]
[3.00000000e+00 9.08203769e-01]
[2.00000000e+00 5.02608557e-01]
[0.00000000e+00 4.57942717e-01]
[1.00000000e+00 2.13786618e-01]
[3.00000000e+00 4.05632356e+00]
[2.00000000e+00 5.14171888e+00]
[0.00000000e+00 5.56237495e-01]
[1.00000000e+00 4.76142736e-01]
[3.00000000e+00 1.54414110e+00]
[2.00000000e+00 6.10930460e+00]
[0.00000000e+00 9.47660177e-01]
[1.00000000e+00 4.87745774e+00]
[3.00000000e+00 3.12703929e+00]
[2.00000000e+00 6.45118831e-03]
[0.00000000e+00 3.01415411e-01]
[1.00000000e+00 8.84955695e-01]
[3.00000000e+00 7.98870968e-02]
[2.00000000e+00 5.23673430e-01]
[0.00000000e+00 3.24171404e+00]
[1.00000000e+00 9.32523506e-02]
[3.00000000e+00 9.13705455e-01]
[2.00000000e+00 1.25766593e+00]
[0.00000000e+00 4.09563895e-01]
[1.00000000e+00 9.46987842e-01]
[3.00000000e+00 2.63836399e+00]
[2.00000000e+00 5.20371222e-01]
[0.00000000e+00 1.86796790e+00]
[1.00000000e+00 5.46768776e+00]
[3.00000000e+00 5.73153563e+00]
[2.00000000e+00 3.12040332e-01]
[0.00000000e+00 3.93986735e-01]
[1.00000000e+00 1.32864695e+00]
[3.00000000e+00 2.38032454e-02]
[2.00000000e+00 1.07872914e+00]
[0.00000000e+00 4.35369355e-01]
[1.00000000e+00 4.55502856e-01]
[3.00000000e+00 1.96212809e-02]
[2.00000000e+00 1.95213538e+00]
[0.00000000e+00 1.54154401e+00]
[1.00000000e+00 1.26364010e+00]
[3.00000000e+00 1.33108375e+00]
[2.00000000e+00 3.02422139e-01]
[0.00000000e+00 5.58860689e-01]
[1.00000000e+00 9.52516316e-02]
[3.00000000e+00 6.25129762e-01]
[2.00000000e+00 8.41875177e-01]
[0.00000000e+00 2.06159470e+00]
[1.00000000e+00 6.39227291e+00]
[3.00000000e+00 2.01200372e-01]
[2.00000000e+00 3.51030769e+00]
[0.00000000e+00 9.83287604e-01]
[1.00000000e+00 7.06014703e-02]
[3.00000000e+00 2.59901305e-01]
[2.00000000e+00 3.74491207e+00]
[0.00000000e+00 2.32143993e+00]]
测试数据:
1.658985 4.285136 -3.453687 3.424321 4.838138 -1.151539 -5.379713 -3.362104 0.972564 2.924086 -3.567919 1.531611 0.450614 -3.302219 -3.487105 -1.724432 2.668759 1.594842 -3.156485 3.191137 3.165506 -3.999838 -2.786837 -3.099354 4.208187 2.984927 -2.123337 2.943366 0.704199 -0.479481 -0.392370 -3.963704 2.831667 1.574018 -0.790153 3.343144 2.943496 -3.357075 -3.195883 -2.283926 2.336445 2.875106 -1.786345 2.554248 2.190101 -1.906020 -3.403367 -2.778288 1.778124 3.880832 -1.688346 2.230267 2.592976 -2.054368 -4.007257 -3.207066 2.257734 3.387564 -2.679011 0.785119 0.939512 -4.023563 -3.674424 -2.261084 2.046259 2.735279 -3.189470 1.780269 4.372646 -0.822248 -2.579316 -3.497576 1.889034 5.190400 -0.798747 2.185588 2.836520 -2.658556 -3.837877 -3.253815 2.096701 3.886007 -2.709034 2.923887 3.367037 -3.184789 -2.121479 -4.232586 2.329546 3.179764 -3.284816 3.273099 3.091414 -3.815232 -3.762093 -2.432191 3.542056 2.778832 -1.736822 4.241041 2.127073 -2.983680 -4.323818 -3.938116 3.792121 5.135768 -4.786473 3.358547 2.624081 -3.260715 -4.009299 -2.978115 2.493525 1.963710 -2.513661 2.642162 1.864375 -3.176309 -3.171184 -3.572452 2.894220 2.489128 -2.562539 2.884438 3.491078 -3.947487 -2.565729 -2.012114 3.332948 3.983102 -1.616805 3.573188 2.280615 -2.559444 -2.651229 -3.103198 2.321395 3.154987 -1.685703 2.939697 3.031012 -3.620252 -4.599622 -2.185829 4.196223 1.126677 -2.133863 3.093686 4.668892 -2.562705 -2.793241 -2.149706 2.884105 3.043438 -2.967647 2.848696 4.479332 -1.764772 -4.905566 -2.911070