美文网首页R语言学习
R语言可视化(十六):PCA图绘制

R语言可视化(十六):PCA图绘制

作者: Davey1220 | 来源:发表于2020-09-30 21:56 被阅读0次

    16. PCA图绘制


    清除当前环境中的变量

    rm(list=ls())
    

    设置工作目录

    setwd("C:/Users/Dell/Desktop/R_Plots/16pca/")
    

    加载示例数据

    data <- read.table("demo_pca.txt",header = T,row.names = 1,sep="\t",check.names = F)
    
    # 查看数据
    head(data)
    ##                  C1       C2       C3        M1        M2        M3
    ## AT1G01060 33.035800 35.55930 34.64920 10.294500 13.393800 13.659000
    ## AT1G01320 73.740400 76.31340 69.04050 17.208400 19.669500 17.630300
    ## AT1G01420  6.667900  6.66516  7.94002  2.863710  3.834140  3.432600
    ## AT1G01520  9.588620  8.55635  9.32078  0.982357  2.876840  2.188840
    ## AT1G01970 17.210700 15.13220 18.35080  7.080410  7.748770  7.272220
    ## AT1G02380  0.749953  1.95807  1.44847  0.934699  0.367559  0.703017
    
    # 数据转置,转换成行为样本,列为基因的矩阵
    data <- t(data)
    

    使用prcomp函数进行PCA分析

    data.pca <- prcomp(data)
    # 查看PCA的分析结果
    summary(data.pca)
    ## Importance of components:
    ##                              PC1       PC2       PC3       PC4      PC5
    ## Standard deviation     1.751e+04 1.459e+03 426.25749 328.16697 171.7883
    ## Proportion of Variance 9.921e-01 6.890e-03   0.00059   0.00035   0.0001
    ## Cumulative Proportion  9.921e-01 9.990e-01   0.99956   0.99990   1.0000
    ##                              PC6
    ## Standard deviation     1.461e-11
    ## Proportion of Variance 0.000e+00
    ## Cumulative Proportion  1.000e+00
    
    #----Standard deviation 标准差   其平方为方差=特征值
    #----Proportion of Variance  方差贡献率
    #----Cumulative Proportion  方差累计贡献率
    
    # 绘制主成分的碎石图
    screeplot(data.pca, npcs = 10, type = "lines")
    
    image.png
    # 查看主成分的结果
    head(data.pca$x)
    ##           PC1        PC2       PC3       PC4         PC5           PC6
    ## C1 -13102.671  1379.8802  231.0448  148.3701  255.621964 -1.575094e-11
    ## C2 -15612.730 -1294.4214 -661.6586  137.3621   -1.050406  2.598617e-11
    ## C3 -15985.837  1072.3905  209.1377 -190.1187 -255.774746  1.695406e-11
    ## M1  16558.189  -442.8635  253.4135  517.0651 -100.838108  1.183409e-13
    ## M2  23877.097  1295.0204 -408.9296 -249.7849   24.401970 -4.182591e-11
    ## M3   4265.951 -2010.0062  376.9922 -362.8937   77.639326  1.456456e-11
    

    使用基础plot函数绘制PCA图

    plot(data.pca$x,cex = 2,main = "PCA analysis", 
         col = c(rep("red",3),rep("blue",3)),
         pch = c(rep(16,3),rep(17,3)))
    # 添加分隔线
    abline(h=0,v=0,lty=2,col="gray")
    # 添加标签
    text(data.pca$x,labels = rownames(data.pca$x),pos = 4,offset = 0.6,cex = 1)
    # 添加图例
    legend("bottomright",title = "Sample",inset = 0.01,
           legend = rownames(data.pca$x),
           col = c(rep("red",3),rep("blue",3)),
           pch = c(rep(16,3),rep(17,3)))
    
    image.png

    使用ggplot2包绘制PCA图

    library(ggplot2)
    
    # 查看示例数据
    head(USArrests)
    ##            Murder Assault UrbanPop Rape
    ## Alabama      13.2     236       58 21.2
    ## Alaska       10.0     263       48 44.5
    ## Arizona       8.1     294       80 31.0
    ## Arkansas      8.8     190       50 19.5
    ## California    9.0     276       91 40.6
    ## Colorado      7.9     204       78 38.7
    
    # 使用princomp函数进行PCA分析
    data.pca <- princomp(USArrests,cor = T)
    # 查看PCA的结果
    summary(data.pca)
    ## Importance of components:
    ##                           Comp.1    Comp.2    Comp.3     Comp.4
    ## Standard deviation     1.5748783 0.9948694 0.5971291 0.41644938
    ## Proportion of Variance 0.6200604 0.2474413 0.0891408 0.04335752
    ## Cumulative Proportion  0.6200604 0.8675017 0.9566425 1.00000000
    
    # 绘制主成分碎石图
    screeplot(data.pca,npcs = 6,type = "barplot")
    
    image.png
    #查看主成分的结果
    pca.scores <- as.data.frame(data.pca$scores)
    head(pca.scores)
    ##                Comp.1     Comp.2      Comp.3       Comp.4
    ## Alabama     0.9855659  1.1333924  0.44426879  0.156267145
    ## Alaska      1.9501378  1.0732133 -2.04000333 -0.438583440
    ## Arizona     1.7631635 -0.7459568 -0.05478082 -0.834652924
    ## Arkansas   -0.1414203  1.1197968 -0.11457369 -0.182810896
    ## California  2.5239801 -1.5429340 -0.59855680 -0.341996478
    ## Colorado    1.5145629 -0.9875551 -1.09500699  0.001464887
    
    # 绘制PCA图
    ggplot(pca.scores,aes(Comp.1,Comp.2,col=rownames(pca.scores))) + 
      geom_point(size=3) + 
      geom_text(aes(label=rownames(pca.scores)),vjust = "outward") + 
      geom_hline(yintercept = 0,lty=2,col="red") + 
      geom_vline(xintercept = 0,lty=2,col="blue",lwd=1) +
      theme_bw() + theme(legend.position = "none") + 
      theme(plot.title = element_text(hjust = 0.5)) + 
      labs(x="PCA_1",y="PCA_2",title = "PCA analysis")
    
    image.png

    使用scatterplot3d包绘制三维PCA图

    library(scatterplot3d)
    
    # 加载示例数据
    data <- read.table("demo_pca.txt",header = T,row.names = 1,sep="\t",check.names = F)
    head(data)
    ##                  C1       C2       C3        M1        M2        M3
    ## AT1G01060 33.035800 35.55930 34.64920 10.294500 13.393800 13.659000
    ## AT1G01320 73.740400 76.31340 69.04050 17.208400 19.669500 17.630300
    ## AT1G01420  6.667900  6.66516  7.94002  2.863710  3.834140  3.432600
    ## AT1G01520  9.588620  8.55635  9.32078  0.982357  2.876840  2.188840
    ## AT1G01970 17.210700 15.13220 18.35080  7.080410  7.748770  7.272220
    ## AT1G02380  0.749953  1.95807  1.44847  0.934699  0.367559  0.703017
    
    # 数据转置,转换成行为样本,列为基因的矩阵
    data <- t(data)
    
    # 使用prcomp函数进行PCA分析
    data.pca <- prcomp(data)
    
    # 绘制三维PCA图
    scatterplot3d(data.pca$x[,1:3],
                  pch = c(rep(16,3),rep(17,3)),
                  color= c(rep("red",3),rep("blue",3)),
                  angle=45, main= "3D PCA plot",
                  cex.symbols= 1.5,,mar=c(5, 4, 4, 5))
    # 添加图例
    legend("topright",title = "Sample",
           xpd=TRUE,inset= -0.01,
           legend = rownames(data.pca$x),
           col = c(rep("red",3),rep("blue",3)),
           pch = c(rep(16,3),rep(17,3)))
    
    image.png

    使用factoextra包绘制PCA图

    library(factoextra)
    
    # 查看示例数据
    head(iris)
    ##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    ## 1          5.1         3.5          1.4         0.2  setosa
    ## 2          4.9         3.0          1.4         0.2  setosa
    ## 3          4.7         3.2          1.3         0.2  setosa
    ## 4          4.6         3.1          1.5         0.2  setosa
    ## 5          5.0         3.6          1.4         0.2  setosa
    ## 6          5.4         3.9          1.7         0.4  setosa
    
    # 使用prcomp函数进行PCA分析
    res.pca <- prcomp(iris[, -5],  scale = TRUE)
    res.pca
    ## Standard deviations (1, .., p=4):
    ## [1] 1.7083611 0.9560494 0.3830886 0.1439265
    ##
    ## Rotation (n x k) = (4 x 4):
    ##                     PC1         PC2        PC3        PC4
    ## Sepal.Length  0.5210659 -0.37741762  0.7195664  0.2612863
    ## Sepal.Width  -0.2693474 -0.92329566 -0.2443818 -0.1235096
    ## Petal.Length  0.5804131 -0.02449161 -0.1421264 -0.8014492
    ## Petal.Width   0.5648565 -0.06694199 -0.6342727  0.5235971
    
    #绘制主成分碎石图
    fviz_screeplot(res.pca, addlabels = TRUE)
    
    image.png
    # 可视化PCA分析的结果
    # Graph of individuals
    # +++++++++++++++++++++
    fviz_pca_ind(res.pca, col.ind="cos2", 
                 geom = "point", # show points only
                 gradient.cols = c("white", "#2E9FDF", "#FC4E07" ))
    
    image.png
    fviz_pca_ind(res.pca, label="none", habillage=iris$Species,
                 addEllipses=TRUE, ellipse.level=0.95,
                 palette = c("#999999", "#E69F00", "#56B4E9"))
    
    image.png
    # Graph of variables
    # +++++++++++++++++++++
    # Default plot
    fviz_pca_var(res.pca, col.var = "steelblue")
    
    image.png
    # Control variable colors using their contributions
    fviz_pca_var(res.pca, col.var = "contrib", 
                 gradient.cols = c("white", "blue", "red"),
                 ggtheme = theme_minimal())
    
    image.png
    # Biplot of individuals and variables
    # +++++++++++++++++++++
    # Keep only the labels for variables
    # Change the color by groups, add ellipses
    fviz_pca_biplot(res.pca, label = "var", habillage=iris$Species,
                    addEllipses=TRUE, ellipse.level=0.95,
                    ggtheme = theme_minimal())
    
    image.png
    sessionInfo()
    R version 3.6.0 (2019-04-26)
    Platform: x86_64-w64-mingw32/x64 (64-bit)
    Running under: Windows 10 x64 (build 18363)
    
    Matrix products: default
    
    locale:
    [1] LC_COLLATE=Chinese (Simplified)_China.936 
    [2] LC_CTYPE=Chinese (Simplified)_China.936   
    [3] LC_MONETARY=Chinese (Simplified)_China.936
    [4] LC_NUMERIC=C                              
    [5] LC_TIME=Chinese (Simplified)_China.936    
    
    attached base packages:
    [1] stats     graphics  grDevices utils     datasets  methods   base     
    
    other attached packages:
    [1] FactoMineR_2.3       factoextra_1.0.7     scatterplot3d_0.3-41
    [4] ggplot2_3.2.0       
    
    loaded via a namespace (and not attached):
     [1] Rcpp_1.0.5         pillar_1.4.2       compiler_3.6.0     RColorBrewer_1.1-2
     [5] ggpubr_0.2.1       tools_3.6.0        digest_0.6.20      evaluate_0.14     
     [9] tibble_2.1.3       gtable_0.3.0       lattice_0.20-38    pkgconfig_2.0.2   
    [13] rlang_0.4.7        rstudioapi_0.10    ggrepel_0.8.1      yaml_2.2.0        
    [17] xfun_0.8           knitr_1.23         withr_2.1.2        dplyr_0.8.3       
    [21] cluster_2.0.8      flashClust_1.01-2  grid_3.6.0         tidyselect_0.2.5  
    [25] glue_1.3.1         R6_2.4.0           rmarkdown_1.13     purrr_0.3.2       
    [29] magrittr_1.5       htmltools_0.3.6    scales_1.0.0       leaps_3.1         
    [33] MASS_7.3-51.4      rsconnect_0.8.16   assertthat_0.2.1   colorspace_1.4-1  
    [37] ggsignif_0.5.0     labeling_0.3       lazyeval_0.2.2     munsell_0.5.0     
    [41] crayon_1.3.4    
    

    相关文章

      网友评论

        本文标题:R语言可视化(十六):PCA图绘制

        本文链接:https://www.haomeiwen.com/subject/jsppuktx.html