美文网首页生信图可视化大全学习计划Za
R语言相关系数计算与可视化(二)

R语言相关系数计算与可视化(二)

作者: 医科研 | 来源:发表于2019-06-26 22:33 被阅读10次

    如何解释相关系数

    相关系数的值在 -1~1之间,1表示强正相关,0不相关,-1表示负相关 相关性矩阵可同时研究多个变量之间的相关性,其结果是一个同时展示一个变量与其它变量的表格
    示例代码

    x表示一个矩阵或者数据框

    #cor(x, method = c("pearson", "kendall", "spearman")) 
    ## 若存在缺失值,用以下代码
    #cor(x, method = "pearson", use = "complete.obs")
    

    简单相关性矩阵的计算

    cor函数计算,但仅仅计算出相关系数,没有pvalue

    library(dplyr)
    ## 
    ## Attaching package: 'dplyr'
    ## The following objects are masked from 'package:stats':
    ## 
    ##     filter, lag
    ## The following objects are masked from 'package:base':
    ## 
    ##     intersect, setdiff, setequal, union
    ## 选择部分变量进入分析
    my_data <- select(mtcars, mpg, disp, hp, drat, wt, qsec)
    head(my_data)
    ##                    mpg disp  hp drat    wt  qsec
    ## Mazda RX4         21.0  160 110 3.90 2.620 16.46
    ## Mazda RX4 Wag     21.0  160 110 3.90 2.875 17.02
    ## Datsun 710        22.8  108  93 3.85 2.320 18.61
    ## Hornet 4 Drive    21.4  258 110 3.08 3.215 19.44
    ## Hornet Sportabout 18.7  360 175 3.15 3.440 17.02
    ## Valiant           18.1  225 105 2.76 3.460 20.22
    ## 计算相关性矩阵
    cor_1 <- round(cor(my_data), 2)
    cor_1
    ##        mpg  disp    hp  drat    wt  qsec
    ## mpg   1.00 -0.85 -0.78  0.68 -0.87  0.42
    ## disp -0.85  1.00  0.79 -0.71  0.89 -0.43
    ## hp   -0.78  0.79  1.00 -0.45  0.66 -0.71
    ## drat  0.68 -0.71 -0.45  1.00 -0.71  0.09
    ## wt   -0.87  0.89  0.66 -0.71  1.00 -0.17
    ## qsec  0.42 -0.43 -0.71  0.09 -0.17  1.00
    

    Hmisc包中的rcorr函数可同时返回相关系数,以及pvalue

    简单版本

    #rcorr(x, type = c("pearson","spearman"))##代码还是非常简单
    
    library("Hmisc")
    ## Loading required package: lattice
    ## Loading required package: survival
    ## Loading required package: Formula
    ## Loading required package: ggplot2
    ## 
    ## Attaching package: 'Hmisc'
    ## The following objects are masked from 'package:dplyr':
    ## 
    ##     src, summarize
    ## The following objects are masked from 'package:base':
    ## 
    ##     format.pval, units
    cor_2 <- rcorr(as.matrix(my_data))
    cor_2
    ##        mpg  disp    hp  drat    wt  qsec
    ## mpg   1.00 -0.85 -0.78  0.68 -0.87  0.42
    ## disp -0.85  1.00  0.79 -0.71  0.89 -0.43
    ## hp   -0.78  0.79  1.00 -0.45  0.66 -0.71
    ## drat  0.68 -0.71 -0.45  1.00 -0.71  0.09
    ## wt   -0.87  0.89  0.66 -0.71  1.00 -0.17
    ## qsec  0.42 -0.43 -0.71  0.09 -0.17  1.00
    ## 
    ## n= 32 
    ## 
    ## 
    ## P
    ##      mpg    disp   hp     drat   wt     qsec  
    ## mpg         0.0000 0.0000 0.0000 0.0000 0.0171
    ## disp 0.0000        0.0000 0.0000 0.0000 0.0131
    ## hp   0.0000 0.0000        0.0100 0.0000 0.0000
    ## drat 0.0000 0.0000 0.0100        0.0000 0.6196
    ## wt   0.0000 0.0000 0.0000 0.0000        0.3389
    ## qsec 0.0171 0.0131 0.0000 0.6196 0.3389
    

    参数解释 r 表示相关性矩阵 n表示矩阵的观测 P表示pvalue,检验水平
    提取相应的值

    str(cor_2)#同样的是一个列表结构
    ## List of 3
    ##  $ r: num [1:6, 1:6] 1 -0.848 -0.776 0.681 -0.868 ...
    ##   ..- attr(*, "dimnames")=List of 2
    ##   .. ..$ : chr [1:6] "mpg" "disp" "hp" "drat" ...
    ##   .. ..$ : chr [1:6] "mpg" "disp" "hp" "drat" ...
    ##  $ n: int [1:6, 1:6] 32 32 32 32 32 32 32 32 32 32 ...
    ##   ..- attr(*, "dimnames")=List of 2
    ##   .. ..$ : chr [1:6] "mpg" "disp" "hp" "drat" ...
    ##   .. ..$ : chr [1:6] "mpg" "disp" "hp" "drat" ...
    ##  $ P: num [1:6, 1:6] NA 9.38e-10 1.79e-07 1.78e-05 1.29e-10 ...
    ##   ..- attr(*, "dimnames")=List of 2
    ##   .. ..$ : chr [1:6] "mpg" "disp" "hp" "drat" ...
    ##   .. ..$ : chr [1:6] "mpg" "disp" "hp" "drat" ...
    ##  - attr(*, "class")= chr "rcorr"
    ##提取pvalue
    cor_2$P
    ##               mpg         disp           hp         drat           wt
    ## mpg            NA 9.380328e-10 1.787835e-07 1.776240e-05 1.293958e-10
    ## disp 9.380328e-10           NA 7.142679e-08 5.282022e-06 1.222311e-11
    ## hp   1.787835e-07 7.142679e-08           NA 9.988772e-03 4.145827e-05
    ## drat 1.776240e-05 5.282022e-06 9.988772e-03           NA 4.784260e-06
    ## wt   1.293958e-10 1.222311e-11 4.145827e-05 4.784260e-06           NA
    ## qsec 1.708199e-02 1.314404e-02 5.766253e-06 6.195826e-01 3.388683e-01
    ##              qsec
    ## mpg  1.708199e-02
    ## disp 1.314404e-02
    ## hp   5.766253e-06
    ## drat 6.195826e-01
    ## wt   3.388683e-01
    ## qsec           NA
    

    提取相关系数矩阵

    cor_2$r
    ##             mpg       disp         hp        drat         wt        qsec
    ## mpg   1.0000000 -0.8475514 -0.7761684  0.68117191 -0.8676594  0.41868403
    ## disp -0.8475514  1.0000000  0.7909486 -0.71021393  0.8879799 -0.43369788
    ## hp   -0.7761684  0.7909486  1.0000000 -0.44875912  0.6587479 -0.70822339
    ## drat  0.6811719 -0.7102139 -0.4487591  1.00000000 -0.7124406  0.09120476
    ## wt   -0.8676594  0.8879799  0.6587479 -0.71244065  1.0000000 -0.17471588
    ## qsec  0.4186840 -0.4336979 -0.7082234  0.09120476 -0.1747159  1.00000000
    

    整理数据格式-方便可视化

    该部分内容引用至custom function for convinient formatting of the correlation matrix
    输入参数1:cor_r=相关系数矩阵
    输入参数2:cor_p=pvalue矩阵

    flat_cor_mat <- function(cor_r, cor_p){
      #作者编写了一个用于整理相关性矩阵的函数
      #into a table with 4 columns containing :
        # Column 1 : 行名 (变量1)
        # Column 2 : 列名(变量2)
        # Column 3 : 相关系数
        # Column 4 : pvalue
      library(tidyr)
      library(tibble)
      cor_r <- rownames_to_column(as.data.frame(cor_r), var = "row")##行名变为列
      cor_r <- gather(cor_r, column, cor, -1)##除row列,其它各列gather,将值提取为变量
      cor_p <- rownames_to_column(as.data.frame(cor_p), var = "row")
      cor_p <- gather(cor_p, column, p, -1)
      cor_p_matrix <- left_join(cor_r, cor_p, by = c("row", "column"))##left_join优先保留左侧
      cor_p_matrix##得到矩阵
    }
    

    应用函数示例

    cor_3 <- rcorr(as.matrix(mtcars[, 1:7]))##
    cor_3
    ##        mpg   cyl  disp    hp  drat    wt  qsec
    ## mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42
    ## cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59
    ## disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43
    ## hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71
    ## drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09
    ## wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17
    ## qsec  0.42 -0.59 -0.43 -0.71  0.09 -0.17  1.00
    ## 
    ## n= 32 
    ## 
    ## 
    ## P
    ##      mpg    cyl    disp   hp     drat   wt     qsec  
    ## mpg         0.0000 0.0000 0.0000 0.0000 0.0000 0.0171
    ## cyl  0.0000        0.0000 0.0000 0.0000 0.0000 0.0004
    ## disp 0.0000 0.0000        0.0000 0.0000 0.0000 0.0131
    ## hp   0.0000 0.0000 0.0000        0.0100 0.0000 0.0000
    ## drat 0.0000 0.0000 0.0000 0.0100        0.0000 0.6196
    ## wt   0.0000 0.0000 0.0000 0.0000 0.0000        0.3389
    ## qsec 0.0171 0.0004 0.0131 0.0000 0.6196 0.3389
    my_cor_matrix <- flat_cor_mat(cor_3$r, cor_3$P)
    head(my_cor_matrix)
    ##    row column        cor            p
    ## 1  mpg    mpg  1.0000000           NA
    ## 2  cyl    mpg -0.8521620 6.112688e-10
    ## 3 disp    mpg -0.8475514 9.380328e-10
    ## 4   hp    mpg -0.7761684 1.787835e-07
    ## 5 drat    mpg  0.6811719 1.776240e-05
    ## 6   wt    mpg -0.8676594 1.293958e-10
    

    相关性矩阵的可视化

    symnum()函数,使用符号象征性的表示相关系数 例如一个简单的格式如下:
    symnum(x, cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), symbols = c(" “,”.“,”,“,”+“,”*“,”B"), abbr.colnames = TRUE)
    其中x表示相关性矩阵
    cutpoints表示相关系数的界值,0-0.3用空格表示,0.3-06以.表示 symbols符号
    abbr.colnames:列名是否缩写

    cor_4 <- cor(mtcars[1:6])
    symnum(cor_4, abbr.colnames = FALSE)
    ##      mpg cyl disp hp drat wt
    ## mpg  1                      
    ## cyl  +   1                  
    ## disp +   *   1              
    ## hp   ,   +   ,    1         
    ## drat ,   ,   ,    .  1      
    ## wt   +   ,   +    ,  ,    1 
    ## attr(,"legend")
    ## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
    

    corrplot函数的可视化

    简单的形式如下 corrplot(corr, method=“circle”) corr指的是可视化矩阵 method指可视化方法,包括“circle”, “square”, “ellipse”, “number”, “shade”, “color”, “pie”.

    M<-cor(mtcars)
    head(round(M,2))
    ##        mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
    ## mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42  0.66  0.60  0.48 -0.55
    ## cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59 -0.81 -0.52 -0.49  0.53
    ## disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43 -0.71 -0.59 -0.56  0.39
    ## hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71 -0.72 -0.24 -0.13  0.75
    ## drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09  0.44  0.71  0.70 -0.09
    ## wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17 -0.55 -0.69 -0.58  0.43
    

    circle法

    require(corrplot)
    ## Loading required package: corrplot
    ## corrplot 0.84 loaded
    corrplot(M, method = "circle")
    
    Fig1
    corrplot(M, method = "ellipse")
    
    Fig2
    corrplot(M, method = "pie")
    
    Fig3
    corrplot(M, method = "color")
    
    Fig4
    corrplot(M, method = "number")
    
    Fig5

    输出布局

    “full” (default) :完整矩阵 “upper”: 上三角 “lower”: 下三角

    # upper triangular
    corrplot(M, type = "upper")
    
    Fig6
    #lower triangular
    corrplot(M, type = "lower")
    
    Fig7
    # correlogram with hclust reordering
    corrplot(M, order = "hclust")
    
    Fig8
    corrplot(M, type = "upper", order = "hclust")
    
    Fig9
    # Change background color to lightgreen and color of the circles to darkorange and steel blue
    corrplot(M, type = "upper", order = "hclust", col = c("darkorange", "steelblue"),
             bg = "lightgreen")
    
    Fig9
    # use "colorRampPallete" to obtain contionus color scales
    col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)
    corrplot(M, type = "upper", order = "hclust", col = col)
    
    Fig10
    # Or use "RColorBrewer" package
    library(RColorBrewer)
    corrplot(M, type = "upper", order = "hclust",
             col = brewer.pal(n = 9, name = "PuOr"), bg = "darkgreen")
    
    Fig11
    ## tl.col文字标签颜色设置,tl.srt文字旋转角度
    corrplot(M, type = "upper", order = "hclust", tl.col = "darkblue", tl.srt = 45)
    
    Fig12
    # 标记出设定检验水平下有意义的点
    cor_5 <- rcorr(as.matrix(mtcars))
    M <- cor_5$r
    p_mat <- cor_5$P
    corrplot(M, type = "upper", order = "hclust", 
             p.mat = p_mat, sig.level = 0.01)
    
    Fig13
    # Leave blank on no significant coefficient
    corrplot(M, type = "upper", order = "hclust", 
             p.mat = p_mat, sig.level = 0.05, insig = "blank")
    
    Fig4

    参考资料

    相关文章

      网友评论

        本文标题:R语言相关系数计算与可视化(二)

        本文链接:https://www.haomeiwen.com/subject/jvxkcctx.html