美文网首页
R 实现聚类分析_07Jul2020

R 实现聚类分析_07Jul2020

作者: liang_rujiang | 来源:发表于2020-07-07 21:08 被阅读0次

    聚类分析

    层次聚类

    #01Jun2018
    
    library(tidyverse)
    library(DataExplorer)
    library(NbClust)
    
    data(nutrient, package = "flexclust")
    profile_missing(nutrient)
    boxplot(nutrient)
    
    nutrient.scaled <- scale(nutrient)
    boxplot(nutrient.scaled)
    d <- dist(nutrient.scaled)
    heatmap(as.matrix(d), labCol = F, labRow = F)
    
    set.seed(1234)
    order <- sample(1:nrow(nutrient.scaled),nrow(nutrient.scaled))
    nutrient.scaled <- nutrient.scaled[order,]
    d <- dist(nutrient.scaled)
    
    fit.average <- hclust(d, method = "average")
    plot(fit.average, hang = -1.5, cex = .8,  main = "Average Linkage Clustering")
    
    nc <- NbClust(nutrient.scaled, distance = "euclidean",
                  min.nc = 2, max.nc = 15, method = "average")
    clusters <- cutree(fit.average, k = 2)
    table(clusters)
    par(mfrow = c(1, 1))
    plot(fit.average, hang = -1, cex = .8,
         main = "Average Linkage Clustering \n 2 Cluster Solution")
    rect.hclust(fit.average, k = 2)
    
    

    kmeans

    ##kmean
    library(NbClust)
    data(nutrient, package = "flexclust")
    nutrient.scaled <- scale(nutrient)
    
    set.seed(1234)
    nc <- NbClust(nutrient.scaled, min.nc = 2, max.nc = 15, method = "kmeans")
    
    set.seed(1234)
    fit.km <- kmeans(nutrient.scaled, 3)
    summary(fit.km)
    
    
    ##kmeans
    library(ggplot2)
    library(cluster)
    library(factoextra)
    data("USArrests")
    USArrests <- na.omit(USArrests)
    head(USArrests, n = 6)
    
    df <- scale(USArrests)
    res <- get_clust_tendency(df, 40, graph = T)
    res$plot
    res$hopkins_stat
    set.seed(123)
    gap_stat <- clusGap(df, FUN = kmeans, nstart = 25, K.max = 10, B=500)
    fviz_gap_stat(gap_stat)
    km.res <- kmeans(df, 4, nstart = 25)
    fviz_cluster(km.res, USArrests)
    
    

    使用eclust进行kmeans聚类或层次聚类

    library(cluster)
    library(factoextra)
    data("USArrests")
    USArrests <- na.omit(USArrests)
    df <- scale(USArrests)
    
    set.seed(1234)
    res.km <- eclust(df, "kmeans")
    fviz_gap_stat(res.km$gap_stat)
    fviz_silhouette(res.km)
    
    res.hc <- eclust(df, "hclust")
    fviz_dend(res.hc, rect = T)
    fviz_silhouette(res.hc)
    fviz_cluster(res.hc)
    
    

    相关文章

      网友评论

          本文标题:R 实现聚类分析_07Jul2020

          本文链接:https://www.haomeiwen.com/subject/giazqktx.html