参考文章
K值选择:参考连接1, 参考连接2, 参考连接3
#计算最佳k(方法1)
fit = cascadeKM(data,3,10,iter=10,criterion="calinski")
calinski.best = as.numeric(which.max(fit$results[2,]))
print(paste("the best k is: ",calinski.best))
plot(fit, sortg = TRUE, grpmts.plot = TRUE)
dev.off()
#计算最佳k(方法2)
library(mclust)
dataset <- scale(df2)
m_clust <- Mclust(as.matrix(dataset), G=1:20) #聚类数目从1一直试到20
summary(m_clust)
plot(m_clust, "BIC")
#计算最佳k(方法3)
library(NbClust)
set.seed(1234) #因为method选择的是kmeans,所以如果不设定种子,每次跑得结果可能不同
nb_clust <- NbClust(dataset, distance = "euclidean",
min.nc=2, max.nc=15, method = "kmeans",
index = "alllong", alphaBeale = 0.1)
barplot(table(nb_clust$Best.nc[1,]),xlab = "聚类数",ylab = "支持指标数")
网友评论