说明
主要实现一下fuzzy c means,理解其实现过程。
注意,fcm实现过程中degree of memerbership 矩阵初始化需要满足三个条件[1].我在第一次初始化时直接给每个点属于每个类隶属度都设为相同的值,结果得到错误的结果。
后来参考了skfuzzy的初始化方式,即随机初始化,得到正确的结果。详情直接看源码。
代码
from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
class Data(object):
def __init__(self):
pass
def generate(self):
self.colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
# Define three cluster centers
centers = [[4, 2],
[1, 7],
[5, 6]]
# Define three cluster sigmas in x and y, respectively
sigmas = [[0.8, 0.3],
[0.3, 0.5],
[1.1, 0.7]]
# Generate test data
np.random.seed(42) # Set seed for reproducibility
self.xpts = np.zeros(1)
self.ypts = np.zeros(1)
self.labels = np.zeros(1)
# 伪造3个高斯分布,以u和sigma作为特征分布
for i, ((xmu, ymu), (xsigma, ysigma)) in enumerate(zip(centers, sigmas)):
self.xpts = np.hstack((self.xpts, np.random.standard_normal(200) * xsigma + xmu))
self.ypts = np.hstack((self.ypts, np.random.standard_normal(200) * ysigma + ymu))
self.labels = np.hstack((self.labels, np.ones(200) * i))
return self.xpts, self.ypts, self.labels
def visualize(self):
# Visualize the test data
fig0, ax0 = plt.subplots()
for label in range(3):
ax0.plot(self.xpts[self.labels == label], self.ypts[self.labels == label], '.',
color=self.colors[label])
ax0.set_title('Test data: 200 points x3 clusters.')
# plt.show()
class Fuzzy(object):
def __init__(self, xpts, ypts, labels):
self.xpts = xpts
self.ypts = ypts
self.labels = labels
def _norm1(self, array):
array = np.abs(array)
return np.sum(array)
def _normalize_rows(self, rows):
normalized_rows = rows / np.sum(rows, axis=1, keepdims=1)
return normalized_rows
def cluster(self, classes, m = 2, niter = 1000, error = 1e-5):
# init u
# u = np.ones([len(self.labels), classes], dtype=np.float32) / 3
n_data = self.xpts.shape[0]
u = np.random.rand(n_data, classes)
u = self._normalize_rows(u)
self.x = np.array(zip(self.xpts, self.ypts))
len_j = u.shape[1]
c = np.zeros([len_j, 2], dtype=np.float32)
for n in xrange(niter):
# calculate c_j
for j in xrange(len_j):
u_j = u[:, j]
u_jm = u_j ** m
numer = np.dot(u_jm, self.x)
deno = np.sum(u_jm)
c[j] = numer / deno
# update u_k
u_new = np.zeros_like(u)
data_size = self.x.shape[0]
class_size = c.shape[0]
for i in xrange(data_size):
for j in xrange(class_size):
numer = 0
for k in xrange(c.shape[0]):
temp = self._norm1(self.x[i] - c[j]) / self._norm1(self.x[i] - c[k])
temp = temp ** (2 / (m-1))
numer += temp
u_new[i, j] = 1 / numer
# check convergence
print('FCM steps:', n)
if(self._norm1(u - u_new) < error):
break
# update u
u = u_new
# return value and center
predict = np.argmax(u, axis=1)
return c, predict
if __name__ == '__main__':
# generate data
data = Data()
xpts, ypts, labels = data.generate()
data.visualize()
# fuzzy c means
fuzzy = Fuzzy(xpts, ypts, labels)
center, predict_labels = fuzzy.cluster(classes=3, m=2, niter = 1000)
# visualize
colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
fig, ax = plt.subplots()
for i in xrange(3):
ax.plot(xpts[predict_labels == i],
ypts[predict_labels == i],
'.',
color=colors[i])
for pt in center:
ax.plot(pt[0], pt[1], 'rs')
ax.set_title('clustering results')
plt.show()
结果:

参考
- A Tutorial on Clustering Algorithms
- skfuzzy demo
- skfuzzy
- J. C. Dunn (1973): "A Fuzzy Relative of the ISODATA Process and Its Use in Detecting Compact Well-Separated Clusters", Journal of Cybernetics 3: 32-57
网友评论