pHeatmap

作者: 余绕 | 来源:发表于2023-01-04 14:34 被阅读0次

    1.加载r包

    library(tidyverse)
    library(pheatmap)
    rm(list=ls())
    

    2.加载数据

    load(file="data/geo-cesc/prepare.rdata")
    load(file="data/geo-cesc/de.rdata")
    

    3.准备画图数据

    cancer_normal_samples<-rownames_to_column(sample_info,var = "sample_id") %>% filter(
      group=='Cancer' | group=='Normal') %>%
      pull(sample_id)
    > cancer_normal_samples
    [1] "GSM1551311" "GSM1551312" "GSM1551313" "GSM1551314" "GSM1551315" "GSM1551316" "GSM1551317" "GSM1551318"
     [9] "GSM1551319" "GSM1551320" "GSM1551321" "GSM1551322" "GSM1551323" "GSM1551324" "GSM1551325" "GSM1551326"
    [17] "GSM1551327" "GSM1551328" "GSM1551329" "GSM1551330" "GSM1551331" "GSM1551332" "GSM1551333" "GSM1551334"
    [25] "GSM1551411" "GSM1551412" "GSM1551413" "GSM1551414" "GSM1551415" "GSM1551416" "GSM1551417" "GSM1551418"
    [33] "GSM1551419" "GSM1551420" "GSM1551421" "GSM1551422" "GSM1551423" "GSM1551424" "GSM1551425" "GSM1551426"
    [41] "GSM1551427" "GSM1551428" "GSM1551429" "GSM1551430" "GSM1551431" "GSM1551432" "GSM1551433" "GSM1551434"
    [49] "GSM1551435" "GSM1551436" "GSM1551437" "GSM1551438"
    
    #pull()获得一个向量。
    # 提取 cancer 和 normal 样本信息表
    cancer_normal_samples_info <- 
      sample_info[cancer_normal_samples, ]
    
    > head(cancer_normal_samples_info)
                   title  group group_num test1 test2 test3 test4
    GSM1551311 Normal-01 Normal         1 18.41 35.58 41.14 96.38
    GSM1551312 Normal-02 Normal         1  5.69 34.32 81.97 23.74
    GSM1551313 Normal-03 Normal         1  9.65 90.20 79.20 67.77
    GSM1551314 Normal-04 Normal         1  1.24 15.51 51.64 59.80
    GSM1551315 Normal-05 Normal         1 27.78 41.61 43.86 43.42
    GSM1551316 Normal-06 Normal         1 88.55 92.93 15.47 25.86
    > tail(cancer_normal_samples_info)
                   title  group group_num test1 test2 test3 test4
    GSM1551433 Cancer-23 Cancer         5 70.22 21.66  3.23 96.39
    GSM1551434 Cancer-24 Cancer         5 66.16 19.81 15.43 74.87
    GSM1551435 Cancer-25 Cancer         5 28.88 20.80 58.05 38.86
    GSM1551436 Cancer-26 Cancer         5 50.73 20.37 37.71 55.58
    GSM1551437 Cancer-27 Cancer         5 34.48 53.74  1.83 75.22
    GSM1551438 Cancer-28 Cancer         5 84.05 95.46 33.81 57.82
    
    top100_de <- select(de_result, Gene_Symbol, one_of(cancer_normal_samples)) %>% #根据cancer_normal_samples的样品筛选
      filter(!is.na(Gene_Symbol)) %>%
      distinct(Gene_Symbol, .keep_all = T) %>% #.keep_all=T,其他列也保留。
      dplyr::slice(1:100) %>% #截取1:100行
      column_to_rownames(var = 'Gene_Symbol') #第一列转换成行名
    
    #最终绘图数据展示,注意如果第一列是基因名字,转换成行名
    > head(top100_de)
              GSM1551311 GSM1551312 GSM1551313 GSM1551314 GSM1551315 GSM1551316 GSM1551317 GSM1551318
    CRNN       14.564888  14.056454  15.257230  14.107240  14.898503  14.253255   14.60548   15.11636
    CRISP3     13.115219   6.753176  14.893816  11.640867  13.667496  14.783084   12.67081   16.29286
    TMPRSS11B  12.832780  10.135652  14.515179  10.892401  13.153797  15.368898   13.49849   14.27766
    SPINK7     13.795566  10.968282  15.021497  11.031661  14.483446  14.257270   12.25095   15.31350
    MAL        14.778703  13.856940  15.678189  14.399882  15.059893  15.026535   15.42890   15.33610
    CDKN2A      1.984084   1.925545   2.627671   2.387346   1.931142   9.789955    2.64514    1.79570
              GSM1551319 GSM1551320 GSM1551321 GSM1551322 GSM1551323 GSM1551324 GSM1551325 GSM1551326
    CRNN       10.683049  15.015768   15.34384  14.999177  14.938214  11.372713  14.957813  14.414759
    CRISP3     11.357619  15.327443   15.54632  15.445171  13.832692  11.975319  15.228619  13.542118
    TMPRSS11B   5.239523  13.966622   14.59732  14.110323  13.489663   9.301619  13.814270  13.468177
    SPINK7      2.656949  14.572917   14.92305  14.807682  14.274013   2.931909  14.327866  13.146851
    MAL        13.359903  15.483915   15.51505  15.111620  15.249427  12.830172  15.377884  15.190483
    CDKN2A     10.100884   2.546179    1.92048   2.376991   7.014672   5.863999   7.018526   2.365393
              GSM1551327 GSM1551328 GSM1551329 GSM1551330 GSM1551331 GSM1551332 GSM1551333 GSM1551334
    CRNN       15.082743   1.405240  12.017268   9.776289   1.819540   14.43467  13.284354  15.235034
    CRISP3     15.741898  13.808530  14.475397  11.584258  10.312848   13.94329  12.676573  13.409766
    TMPRSS11B  14.038087   4.245354  11.565818   6.108587   2.740200   13.10699  12.141673  12.367893
    SPINK7     14.999177   2.692052   4.543542   2.702892   2.670086   13.12342  11.754827  13.472471
    MAL        15.289225  10.800478  13.627822  10.637032   2.870667   14.89382  14.264146  15.616283
    CDKN2A      2.346804   3.703309   2.420727  11.156873   2.310679    2.83676   4.918912   2.977643
              GSM1551411 GSM1551412 GSM1551413 GSM1551414 GSM1551415 GSM1551416 GSM1551417 GSM1551418
    CRNN        1.675121  14.511193  14.690445   1.675573   5.496662   1.701228   1.487401   1.804788
    CRISP3      3.433046   4.965640  13.020786   3.522480   4.073336   3.724118   3.344212   3.172724
    TMPRSS11B   2.984133  13.960905  14.126898   4.177635   8.096307   3.510644   2.649728   2.699373
    SPINK7      2.583642  12.774868  14.079914   3.245765   8.488482   6.015201   1.969648   2.678002
    MAL         2.643330   7.729519  14.425534   8.180750  12.927761   2.917134   7.228786   5.745970
    CDKN2A     11.065370  10.779047   9.132962  12.366948  11.840423  12.432680   9.577612  10.322539
              GSM1551419 GSM1551420 GSM1551421 GSM1551422 GSM1551423 GSM1551424 GSM1551425 GSM1551426
    CRNN        1.772998   1.516639   1.569710   1.518697   1.487942  13.373768   1.490927   4.893573
    CRISP3      3.563199   3.842121   3.547146   3.360418   3.553715   3.447426   7.824966   3.519106
    TMPRSS11B   3.397675   3.489286   2.733866   2.653602   2.708027  12.490550   2.787072   6.134873
    SPINK7      1.966850   6.569915   2.321821   2.343616   2.348236  10.576248   2.327820   7.704565
    MAL         3.494209   3.553481   8.298212   6.559287   7.716378  14.844402   3.639841  12.864211
    CDKN2A      9.972505  12.572880   9.151658   1.676136  11.319855  11.116841   8.970740  11.287819
              GSM1551427 GSM1551428 GSM1551429 GSM1551430 GSM1551431 GSM1551432 GSM1551433 GSM1551434
    CRNN        1.500992   1.489840   1.487663   1.491605   1.541168   7.882657   1.520965   1.526133
    CRISP3      3.357022   3.462346   4.511766   3.483928   3.492802   3.505230   3.378156   3.534880
    TMPRSS11B   2.643432   2.723398   2.693362   5.144463   5.262213   8.845981   2.731175   4.272924
    SPINK7      1.968468   1.977180   1.975370   2.350997  10.060368   6.247993   2.347467   2.347151
    MAL         7.659548   3.645124   3.677631   8.514474  11.696981   4.683359   9.822449   3.792151
    CDKN2A      9.450005   8.284904  11.769030  11.153688   4.731811  11.158452  10.198964  12.098064
              GSM1551435 GSM1551436 GSM1551437 GSM1551438
    CRNN        1.522207   1.547932   4.244296   1.524905
    CRISP3     10.138274   3.479104   6.903048   3.565694
    TMPRSS11B   2.848793   3.037301   3.961122   3.236059
    SPINK7      2.306055   2.347518   2.719931   1.984972
    MAL         6.802059   6.359755   8.135977  12.639992
    CDKN2A     11.592411  11.738051  12.054345  11.748801
    
    3.1.绘图-直接出图默认,
    pheatmap(top100_de[1:20,],
             # 去除样本名称
             show_colnames = F)
    
    image.png
    3.2 设置图的高度和字体
    pheatmap(top100_de[1:20,],
             show_colnames = F,
             cellwidth = 6,
             cellheight = 6,
             fontsize = 6)
    
    image.png
    3.3 设置图的高度和字体+设置两个分支 cutree_cols =2
    pheatmap(top100_de[1:20,],
             show_colnames = F,
             cellwidth = 6,
             cellheight = 6,
             fontsize = 6,
             # 样本分为 2 块
             cutree_cols = 2)
    
    image.png
    3.4 加注释annotation_col = dplyr::select(cancer_normal_samples_info, group),
    pheatmap(top100_de[1:20,],
             show_colnames = F,
             cellwidth = 10,
             cellheight = 10,
             fontsize = 6,
             cutree_cols = 2,
             annotation_col = dplyr::select(
               cancer_normal_samples_info, 
               group),
             annotation_colors = list(
               group = c('Cancer' = 'red', 
                         'Normal' = 'green')
             )
    )
    
    image.png
    3.4 修改color bar; colorRampPalette(c("green","white","red"))(10)
    pheatmap(top100_de[1:20,],
             show_colnames = F,
             cellwidth = 6,
             cellheight = 6,
             fontsize = 6,
             cutree_cols = 2,
             annotation_col = dplyr::select(
               cancer_normal_samples_info, 
               group),
             annotation_colors = list(
               group = c('Cancer' = '#fc8d59', 
                         'Normal' = '#99d594')
             ),
             color = colorRampPalette(c("green","white","red"))(10), #这里的10由下面得出。
             breaks = seq(0, 20, 2), #最大值20,最小值0,其中分割成20/2=10份。
             legend_breaks = seq(0, 20, 2), #热度条以2递增一个。这里是设置数值。
             legend_labels = seq(0, 20, 2) #热度条数值以2递增。这里是显示的数值
    )
    
    image.png

    3.5 修改color bar; colorRampPalette(c("green","white","red"))(20)

    
    pheatmap(top100_de[1:20,],
                  show_colnames = F,
                  cellwidth = 6,
                  cellheight = 6,
                  fontsize = 6,
                  cutree_cols = 2,
                  annotation_col = dplyr::select(
                    cancer_normal_samples_info, 
                    group),
                  annotation_colors = list(
                    group = c('Cancer' = '#fc8d59', 
                              'Normal' = '#99d594')
                  ),
                  color = colorRampPalette(c("green","white","red"))(20),
                  breaks = seq(0, 20, 2),
                  legend_breaks = seq(0, 20, 2),
                  legend_labels = seq(0, 20, 2)
    )
    
    
    
    image.png

    相关文章

      网友评论

          本文标题:pHeatmap

          本文链接:https://www.haomeiwen.com/subject/mbykcdtx.html