# K-means 分群（二）

``````import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

pca = PCA(3)  # 將 64 維投影至 3 維
projected = pca.fit_transform(digits.data)

# 透過 KMeans 分群
kmeans = KMeans(n_clusters = 10)
kmeans.fit(digits.data)
target = kmeans.predict(digits.data)

ax = plt.axes(projection='3d')
ax.set_xlabel('component 1')
ax.set_ylabel('component 2')
ax.set_zlabel('component 3')
ax.set_box_aspect((1, 1, 1))

p = ax.scatter(
projected[:,0], projected[:,1], projected[:,2],
c = target,           # 指定 K-means 分群結果作為標記
edgecolor = 'none',   # 無邊框
alpha = 0.5,          # 不透明度
cmap = plt.cm.get_cmap('nipy_spectral', 10) # 依標記著色
)

plt.gcf().colorbar(p) # 著色圖例

plt.show()
``````

``````import numpy as np
import matplotlib.pyplot as plt

from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans

# 各群平方距離的加總
def sum_squared_dist(data, centers, target):
# 第 i 群平方距離加總
def sum_square_dist(i):
p = data[np.where(target == i)]
return np.sum(cdist(p, [centers[i]]) ** 2)
sum_square_dist = np.frompyfunc(sum_square_dist, 1, 1)

k = centers.shape[0]
return np.sum(sum_square_dist(np.arange(0, k)))

# 分 k 群，求對應的各群平方距離加總
def k_sum_squared_dist(k, data):
def _k_sum_squared_dist(k):
kmeans = KMeans(n_clusters = k)
kmeans.fit(data)
centers = kmeans.cluster_centers_
target = kmeans.predict(data)
return sum_squared_dist(data, centers, target)
_k_sum_squared_dist = np.frompyfunc(_k_sum_squared_dist, 1, 1)

return _k_sum_squared_dist(k)

k = np.arange(2, 25)
msds = k_sum_squared_dist(k, digits.data)

plt.plot(k, msds);

plt.show()
``````

``````import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

# 分 k 群，求對應的各群平方距離加總
def k_sum_squared_dist(k, data):
def _k_sum_squared_dist(k):
kmeans = KMeans(n_clusters = k)
kmeans.fit(data)
return kmeans.inertia_
_k_sum_squared_dist = np.frompyfunc(_k_sum_squared_dist, 1, 1)

return _k_sum_squared_dist(k)

k = np.arange(2, 25)
msds = k_sum_squared_dist(k, digits.data)

plt.plot(k, msds);

plt.show()
``````

``````import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# 分 k 群，求對應的各群平方距離加總
def k_sum_squared_dist(k, data):
def _k_sum_squared_dist(k):
kmeans = KMeans(n_clusters = k)
kmeans.fit(data)
return kmeans.inertia_
_k_sum_squared_dist = np.frompyfunc(_k_sum_squared_dist, 1, 1)

return _k_sum_squared_dist(k)

pca = PCA(2)  # 將 64 維投影至 2 維
projected = pca.fit_transform(digits.data)

k = np.arange(2, 25)
msds = k_sum_squared_dist(k, projected)

plt.plot(k, msds);

plt.show()
``````

sklearn 的 `sklearn.metrics.silhouette_score` 可以用來計算輪廓係數，來看看降為 2 維時的樣子：

``````import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score

# 分 k 群，求對應的各群平方距離加總
def k_sum_squared_dist(k, data):
def _k_sum_squared_dist(k):
kmeans = KMeans(n_clusters = k)
kmeans.fit(data)
# 輪廓係數
return silhouette_score(data, kmeans.labels_)
_k_sum_squared_dist = np.frompyfunc(_k_sum_squared_dist, 1, 1)

return _k_sum_squared_dist(k)

pca = PCA(2)  # 將 64 維投影至 3 維
projected = pca.fit_transform(digits.data)

k = np.arange(2, 25)
msds = k_sum_squared_dist(k, projected)

plt.plot(k, msds);

plt.show()
``````

``````import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from scipy import spatial

k = 20

X, _ = make_blobs(n_samples = 500, centers = k, cluster_std = 0.8)

kmeans = KMeans(n_clusters = k)   # 分群
kmeans.fit(X)
centers = kmeans.cluster_centers_ # 群心
y_kmeans = kmeans.predict(X)      # 分群

# 以群心畫 Voronoi
vor = spatial.Voronoi(centers)
spatial.voronoi_plot_2d(vor)

plt.xlabel('x')
plt.ylabel('y')

plt.scatter(
X[:,0], X[:,1],
c = y_kmeans,         # 指定標記
edgecolor = 'none',   # 無邊框
alpha = 0.5           # 不透明度
)

plt.show()
``````

``````import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
import cv2

k = 5

X = img.copy().reshape((img.shape[0] * img.shape[1], 3))

kmeans = KMeans(n_clusters = k)   # 分群
kmeans.fit(X)
centers = kmeans.cluster_centers_ # 群心
y_kmeans = kmeans.predict(X)      # 分群

# 使用群心顏色填滿同一群
img2 = centers[y_kmeans].reshape(
(img.shape[0], img.shape[1], 3)).astype('uint8')

cv2.imshow('caterpillar', img)
cv2.imshow('reduced', img2)

cv2.waitKey(0)
cv2.destroyAllWindows()
``````