聚类分析
层次聚类
#01Jun2018
library(tidyverse)
library(DataExplorer)
library(NbClust)
data(nutrient, package = "flexclust")
profile_missing(nutrient)
boxplot(nutrient)
nutrient.scaled <- scale(nutrient)
boxplot(nutrient.scaled)
d <- dist(nutrient.scaled)
heatmap(as.matrix(d), labCol = F, labRow = F)
set.seed(1234)
order <- sample(1:nrow(nutrient.scaled),nrow(nutrient.scaled))
nutrient.scaled <- nutrient.scaled[order,]
d <- dist(nutrient.scaled)
fit.average <- hclust(d, method = "average")
plot(fit.average, hang = -1.5, cex = .8, main = "Average Linkage Clustering")
nc <- NbClust(nutrient.scaled, distance = "euclidean",
min.nc = 2, max.nc = 15, method = "average")
clusters <- cutree(fit.average, k = 2)
table(clusters)
par(mfrow = c(1, 1))
plot(fit.average, hang = -1, cex = .8,
main = "Average Linkage Clustering \n 2 Cluster Solution")
rect.hclust(fit.average, k = 2)
kmeans
##kmean
library(NbClust)
data(nutrient, package = "flexclust")
nutrient.scaled <- scale(nutrient)
set.seed(1234)
nc <- NbClust(nutrient.scaled, min.nc = 2, max.nc = 15, method = "kmeans")
set.seed(1234)
fit.km <- kmeans(nutrient.scaled, 3)
summary(fit.km)
##kmeans
library(ggplot2)
library(cluster)
library(factoextra)
data("USArrests")
USArrests <- na.omit(USArrests)
head(USArrests, n = 6)
df <- scale(USArrests)
res <- get_clust_tendency(df, 40, graph = T)
res$plot
res$hopkins_stat
set.seed(123)
gap_stat <- clusGap(df, FUN = kmeans, nstart = 25, K.max = 10, B=500)
fviz_gap_stat(gap_stat)
km.res <- kmeans(df, 4, nstart = 25)
fviz_cluster(km.res, USArrests)
使用eclust进行kmeans聚类或层次聚类
library(cluster)
library(factoextra)
data("USArrests")
USArrests <- na.omit(USArrests)
df <- scale(USArrests)
set.seed(1234)
res.km <- eclust(df, "kmeans")
fviz_gap_stat(res.km$gap_stat)
fviz_silhouette(res.km)
res.hc <- eclust(df, "hclust")
fviz_dend(res.hc, rect = T)
fviz_silhouette(res.hc)
fviz_cluster(res.hc)
网友评论