1.加载r包
library(tidyverse)
library(pheatmap)
rm(list=ls())
2.加载数据
load(file="data/geo-cesc/prepare.rdata")
load(file="data/geo-cesc/de.rdata")
3.准备画图数据
cancer_normal_samples<-rownames_to_column(sample_info,var = "sample_id") %>% filter(
group=='Cancer' | group=='Normal') %>%
pull(sample_id)
> cancer_normal_samples
[1] "GSM1551311" "GSM1551312" "GSM1551313" "GSM1551314" "GSM1551315" "GSM1551316" "GSM1551317" "GSM1551318"
[9] "GSM1551319" "GSM1551320" "GSM1551321" "GSM1551322" "GSM1551323" "GSM1551324" "GSM1551325" "GSM1551326"
[17] "GSM1551327" "GSM1551328" "GSM1551329" "GSM1551330" "GSM1551331" "GSM1551332" "GSM1551333" "GSM1551334"
[25] "GSM1551411" "GSM1551412" "GSM1551413" "GSM1551414" "GSM1551415" "GSM1551416" "GSM1551417" "GSM1551418"
[33] "GSM1551419" "GSM1551420" "GSM1551421" "GSM1551422" "GSM1551423" "GSM1551424" "GSM1551425" "GSM1551426"
[41] "GSM1551427" "GSM1551428" "GSM1551429" "GSM1551430" "GSM1551431" "GSM1551432" "GSM1551433" "GSM1551434"
[49] "GSM1551435" "GSM1551436" "GSM1551437" "GSM1551438"
#pull()获得一个向量。
# 提取 cancer 和 normal 样本信息表
cancer_normal_samples_info <-
sample_info[cancer_normal_samples, ]
> head(cancer_normal_samples_info)
title group group_num test1 test2 test3 test4
GSM1551311 Normal-01 Normal 1 18.41 35.58 41.14 96.38
GSM1551312 Normal-02 Normal 1 5.69 34.32 81.97 23.74
GSM1551313 Normal-03 Normal 1 9.65 90.20 79.20 67.77
GSM1551314 Normal-04 Normal 1 1.24 15.51 51.64 59.80
GSM1551315 Normal-05 Normal 1 27.78 41.61 43.86 43.42
GSM1551316 Normal-06 Normal 1 88.55 92.93 15.47 25.86
> tail(cancer_normal_samples_info)
title group group_num test1 test2 test3 test4
GSM1551433 Cancer-23 Cancer 5 70.22 21.66 3.23 96.39
GSM1551434 Cancer-24 Cancer 5 66.16 19.81 15.43 74.87
GSM1551435 Cancer-25 Cancer 5 28.88 20.80 58.05 38.86
GSM1551436 Cancer-26 Cancer 5 50.73 20.37 37.71 55.58
GSM1551437 Cancer-27 Cancer 5 34.48 53.74 1.83 75.22
GSM1551438 Cancer-28 Cancer 5 84.05 95.46 33.81 57.82
top100_de <- select(de_result, Gene_Symbol, one_of(cancer_normal_samples)) %>% #根据cancer_normal_samples的样品筛选
filter(!is.na(Gene_Symbol)) %>%
distinct(Gene_Symbol, .keep_all = T) %>% #.keep_all=T,其他列也保留。
dplyr::slice(1:100) %>% #截取1:100行
column_to_rownames(var = 'Gene_Symbol') #第一列转换成行名
#最终绘图数据展示,注意如果第一列是基因名字,转换成行名
> head(top100_de)
GSM1551311 GSM1551312 GSM1551313 GSM1551314 GSM1551315 GSM1551316 GSM1551317 GSM1551318
CRNN 14.564888 14.056454 15.257230 14.107240 14.898503 14.253255 14.60548 15.11636
CRISP3 13.115219 6.753176 14.893816 11.640867 13.667496 14.783084 12.67081 16.29286
TMPRSS11B 12.832780 10.135652 14.515179 10.892401 13.153797 15.368898 13.49849 14.27766
SPINK7 13.795566 10.968282 15.021497 11.031661 14.483446 14.257270 12.25095 15.31350
MAL 14.778703 13.856940 15.678189 14.399882 15.059893 15.026535 15.42890 15.33610
CDKN2A 1.984084 1.925545 2.627671 2.387346 1.931142 9.789955 2.64514 1.79570
GSM1551319 GSM1551320 GSM1551321 GSM1551322 GSM1551323 GSM1551324 GSM1551325 GSM1551326
CRNN 10.683049 15.015768 15.34384 14.999177 14.938214 11.372713 14.957813 14.414759
CRISP3 11.357619 15.327443 15.54632 15.445171 13.832692 11.975319 15.228619 13.542118
TMPRSS11B 5.239523 13.966622 14.59732 14.110323 13.489663 9.301619 13.814270 13.468177
SPINK7 2.656949 14.572917 14.92305 14.807682 14.274013 2.931909 14.327866 13.146851
MAL 13.359903 15.483915 15.51505 15.111620 15.249427 12.830172 15.377884 15.190483
CDKN2A 10.100884 2.546179 1.92048 2.376991 7.014672 5.863999 7.018526 2.365393
GSM1551327 GSM1551328 GSM1551329 GSM1551330 GSM1551331 GSM1551332 GSM1551333 GSM1551334
CRNN 15.082743 1.405240 12.017268 9.776289 1.819540 14.43467 13.284354 15.235034
CRISP3 15.741898 13.808530 14.475397 11.584258 10.312848 13.94329 12.676573 13.409766
TMPRSS11B 14.038087 4.245354 11.565818 6.108587 2.740200 13.10699 12.141673 12.367893
SPINK7 14.999177 2.692052 4.543542 2.702892 2.670086 13.12342 11.754827 13.472471
MAL 15.289225 10.800478 13.627822 10.637032 2.870667 14.89382 14.264146 15.616283
CDKN2A 2.346804 3.703309 2.420727 11.156873 2.310679 2.83676 4.918912 2.977643
GSM1551411 GSM1551412 GSM1551413 GSM1551414 GSM1551415 GSM1551416 GSM1551417 GSM1551418
CRNN 1.675121 14.511193 14.690445 1.675573 5.496662 1.701228 1.487401 1.804788
CRISP3 3.433046 4.965640 13.020786 3.522480 4.073336 3.724118 3.344212 3.172724
TMPRSS11B 2.984133 13.960905 14.126898 4.177635 8.096307 3.510644 2.649728 2.699373
SPINK7 2.583642 12.774868 14.079914 3.245765 8.488482 6.015201 1.969648 2.678002
MAL 2.643330 7.729519 14.425534 8.180750 12.927761 2.917134 7.228786 5.745970
CDKN2A 11.065370 10.779047 9.132962 12.366948 11.840423 12.432680 9.577612 10.322539
GSM1551419 GSM1551420 GSM1551421 GSM1551422 GSM1551423 GSM1551424 GSM1551425 GSM1551426
CRNN 1.772998 1.516639 1.569710 1.518697 1.487942 13.373768 1.490927 4.893573
CRISP3 3.563199 3.842121 3.547146 3.360418 3.553715 3.447426 7.824966 3.519106
TMPRSS11B 3.397675 3.489286 2.733866 2.653602 2.708027 12.490550 2.787072 6.134873
SPINK7 1.966850 6.569915 2.321821 2.343616 2.348236 10.576248 2.327820 7.704565
MAL 3.494209 3.553481 8.298212 6.559287 7.716378 14.844402 3.639841 12.864211
CDKN2A 9.972505 12.572880 9.151658 1.676136 11.319855 11.116841 8.970740 11.287819
GSM1551427 GSM1551428 GSM1551429 GSM1551430 GSM1551431 GSM1551432 GSM1551433 GSM1551434
CRNN 1.500992 1.489840 1.487663 1.491605 1.541168 7.882657 1.520965 1.526133
CRISP3 3.357022 3.462346 4.511766 3.483928 3.492802 3.505230 3.378156 3.534880
TMPRSS11B 2.643432 2.723398 2.693362 5.144463 5.262213 8.845981 2.731175 4.272924
SPINK7 1.968468 1.977180 1.975370 2.350997 10.060368 6.247993 2.347467 2.347151
MAL 7.659548 3.645124 3.677631 8.514474 11.696981 4.683359 9.822449 3.792151
CDKN2A 9.450005 8.284904 11.769030 11.153688 4.731811 11.158452 10.198964 12.098064
GSM1551435 GSM1551436 GSM1551437 GSM1551438
CRNN 1.522207 1.547932 4.244296 1.524905
CRISP3 10.138274 3.479104 6.903048 3.565694
TMPRSS11B 2.848793 3.037301 3.961122 3.236059
SPINK7 2.306055 2.347518 2.719931 1.984972
MAL 6.802059 6.359755 8.135977 12.639992
CDKN2A 11.592411 11.738051 12.054345 11.748801
3.1.绘图-直接出图默认,
pheatmap(top100_de[1:20,],
# 去除样本名称
show_colnames = F)
![](https://img.haomeiwen.com/i22929949/30d09045c50d29c9.png)
image.png
3.2 设置图的高度和字体
pheatmap(top100_de[1:20,],
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6)
![](https://img.haomeiwen.com/i22929949/c5f62206beb20d2c.png)
image.png
3.3 设置图的高度和字体+设置两个分支 cutree_cols =2
pheatmap(top100_de[1:20,],
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6,
# 样本分为 2 块
cutree_cols = 2)
![](https://img.haomeiwen.com/i22929949/0bae0f3e4f6fea44.png)
image.png
3.4 加注释annotation_col = dplyr::select(cancer_normal_samples_info, group),
pheatmap(top100_de[1:20,],
show_colnames = F,
cellwidth = 10,
cellheight = 10,
fontsize = 6,
cutree_cols = 2,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = 'red',
'Normal' = 'green')
)
)
![](https://img.haomeiwen.com/i22929949/f9f915e85b50410f.png)
image.png
3.4 修改color bar; colorRampPalette(c("green","white","red"))(10)
pheatmap(top100_de[1:20,],
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6,
cutree_cols = 2,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#99d594')
),
color = colorRampPalette(c("green","white","red"))(10), #这里的10由下面得出。
breaks = seq(0, 20, 2), #最大值20,最小值0,其中分割成20/2=10份。
legend_breaks = seq(0, 20, 2), #热度条以2递增一个。这里是设置数值。
legend_labels = seq(0, 20, 2) #热度条数值以2递增。这里是显示的数值
)
![](https://img.haomeiwen.com/i22929949/fd9af439d20734e7.png)
image.png
3.5 修改color bar; colorRampPalette(c("green","white","red"))(20)
pheatmap(top100_de[1:20,],
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6,
cutree_cols = 2,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#99d594')
),
color = colorRampPalette(c("green","white","red"))(20),
breaks = seq(0, 20, 2),
legend_breaks = seq(0, 20, 2),
legend_labels = seq(0, 20, 2)
)
![](https://img.haomeiwen.com/i22929949/e6ab0331059fe5ab.png)
image.png
网友评论