library(FactoMineR)
library(factoextra)
df <- scale(USArrests) # 数据进行标准化
fviz_nbclust(df, kmeans, method = "wss") +#确定最佳聚类数目
geom_vline(xintercept = 4, linetype = 2)#可以发现聚为四类是比较合理的,此时坡度较缓
#设置随机数种子
set.seed(123)
#利用k-mean是进行聚类并简单显示聚类结果
(km_result <- kmeans(df, 4, nstart = 24))
#查看归类情况
km_result$cluster
#查看每一类数目
table(km_result$cluster)
#可视化
fviz_cluster(km_result, data = df,
palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
# ellipse.type = "convex",#多边形
# ellipse.type = "confidence",#原形,置信区间
# ellipse.type = "t",#圆形,多元t分布
# ellipse.type = "norm",#圆形,多元z正态分布
ellipse.type = "euclid",#圆形,欧几里得距离
star.plot = TRUE, #连线
repel = TRUE,#避免重叠
ggtheme = theme_classic())
result <- dist(df, method = "euclidean")#求样本之间距离
result_hc <- hclust(d = result, method = "ward.D2")#产生层次结构
fviz_dend(result_hc, cex = 0.6)#进行初步展示,由图形得知分为四类较合理
#进一步操作将层次分为4类并查看归类情况
(x <- cutree(result_hc,k=4))
#查看每一类数目
table(x)
#可视化
fviz_dend(result_hc,
k = 4,
cex = 0.6,
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = TRUE, #根据聚类分类情况上色
rect = TRUE, #划框
ggtheme = theme_classic())
网友评论