本篇文章给大家带来热图的实操,大家一定要动手实践起来哦!学会了的知识才是自己的!赶紧跟着小易学习起来吧!以微生物组与代谢组联合分析作为示例,带大家复现几种相关性热图!
1. 准备输入数据
1) 物种丰度表(*_otu.tsv),格式如下:
说明:第一列为物种名称,第一行为样本的名称,其他的为对应样本的物种丰度,表格用制表格分割。
2)代谢物含量表(metabolites_*.tsv),格式如下:
说明:第一列为代谢物id(或者名称),第一行为样本的名称,其他的为对应样本的代谢物的含量,表格用制表格分割。
2. 脚本及运行
2.1 相关性聚类热图
用法:
Rscript combine_heatmap.R genus_otu.tsv metabolites_differential.tsv prefix
可视化示例图:
library(psych)
library(vegan)
library(reshape2)
library(ComplexHeatmap)
library(circlize)
options(bitmapType='cairo') #关闭服务器与界面的互动响应
read_data <- function(data){
data <- read.table(data, header=T, row.names=1, sep="\t", comment.char="", quote="", check.names=FALSE)
#check.names=FALSE 保留原始表头
data <- data[which(rowSums(data) >0),] #过滤所有丰度都为0得行
data <- t(data)
return(data)
}
filter_pvalue_row <- function(data, pmax=0.05){#过滤P值均大于0.05的行
r <- c()
for(i in rownames(data)){
if(min(data[i, ]) <= pmax){ #存在P值小于等于0.05的行
r <- c(r, i)
}else{
print(i)
}
}
return(r)
}
filter_rvalue_row <- function(data, rmin=0.6){ #过滤R值均小于0.6的行
r <- c()
for(i in rownames(data)){
if(max(abs(data[i, ])) >= rmin){ #存在R值大于等于0.6的行
r <- c(r, i)
}else{
print(i)
}
}
return(r)
}
mark_pvalue <- function(pvalue){#筛选标注哪些小于0.01小于0.05并标注
if(!is.null(pvalue)){
site1 <- pvalue<0.01
pvalue[site1] <-"**"
site2 <- pvalue >0.01& pvalue <0.05
pvalue[site2] <-"*"
pvalue[!site2&!site1]<-""
}else{
pvalue <- F
}
return(pvalue)
}
filter_pvalue <- function(pvalue, rvalue){
indexs <- which(apply(pvalue,1, min, na.rm=TRUE) <0.05)
pvalue <- pvalue[indexs,]
rvalue <- rvalue[indexs,]
r <-list(pvalue=pvalue, rvalue=rvalue)
return(r)
}
size_picture <- function(data){
rows <- nrow(data) #计算行数
#print(rows)
columns <- length(data[1,]) #计算列数
#print(columns)
if(rows<=20) {
widths <-20
ratio <-1
}elseif(rows<=50) {
widths <-25
ratio <-1.2
}elseif(rows<=80){
widths <-35
ratio <-1.6
}elseif(rows<=100){
widths <-50
ratio <-1.6
}else{
widths <-100
ratio <-1.8
}
if(columns<=8) {
heights <-8
}elseif(columns<=12) {
heights <-10
}elseif(columns<=20) {
heights <-15
}else{
heights <-20
}
r <-list(widths=widths, heights=heights, ratio=ratio)
}
correlation_analysis <- function(data1, data2, prefix){
data1 <- read_data(data1)
data2 <- read_data(data2)
sample_id <- rownames(data1)
data2 <- data2[sample_id, ] #匹配样本
result <- corr.test(data1, data2, method="spearman", adjust="none") #计算相关性 pearson
rvalue <- result$r
pvalue <- result$p
rowpid <- filter_pvalue_row(pvalue,0.05) #获取有P值小于0.05的行
pvalue <- t(pvalue[rowpid,])
colpid <- filter_pvalue_row(pvalue,0.05) #获取有P值小于0.05的列
rvalue <- rvalue[rowpid, colpid]
rowrid <- filter_rvalue_row(rvalue,0.6) #获取有R值小于0.6的行
rvalue <- t(rvalue[rowrid,])
colrid <- filter_rvalue_row(rvalue,0.6) #R值小于0.6的行
rvalue <- rvalue[colrid, ]
pvalue <- pvalue[colrid, rowrid]
r <- filter_pvalue(pvalue, rvalue)
rvalue <- r$rvalue
pvalue <- r$pvalue
result <- melt(rvalue, value.name="cor")
result$pvalue <- as.vector(pvalue)
write.table(result, file=paste(prefix,".correlation_pvalue.xls", sep=""), row.names=FALSE, sep="\t", quote=FALSE, na ="")
data.1<-scale(data1)
data.2<-scale(data2)
rvalue<-t(rvalue)
pvalue <- mark_pvalue(pvalue)
pvalue<-t(pvalue)
data.2<- data.2[,match(colnames(rvalue), colnames(data.2))]
data.1<- t(data.1[,match(rownames(rvalue), colnames(data.1))])
wh <- size_picture(t(data2))
widths <- wh$widths
heights <- wh$heights
ratio <- wh$ratio
col_fun = colorRamp2(c(-1,0,1), c("blue","white","red"))
ha <- rowAnnotation(empty=anno_empty(border=FALSE),
Microbiome=data.1, col=list(Microbiome=col_fun),
show_legend=F, annotation_label="Microbiome", foo = anno_text(rownames(data.1)))
p1 <- Heatmap(rvalue, name="Correlation",
cluster_columns=T, cluster_rows=T, show_row_names=F,
show_heatmap_legend=TRUE, width=unit(widths*ratio,"cm"), height=unit(heights*0.55,"cm"),
column_dend_height=unit(3,"cm"),
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%s",pvalue[i, j]), x, y, gp = gpar(fontsize =10))
}, right_annotation=ha)
p2 <- Heatmap(data.2, name="Metabolome",
column_title="Metabolome",column_title_side="bottom", cluster_columns=F, show_row_names=F, #show_row_names=T 添加样本名称
cluster_rows=F, width=unit(widths*ratio,"cm"), height=unit(heights*0.55,"cm"))
lgd.list= Legend(col_fun=col_fun, title="Microbiome")
ht = p1%v%p2
pdf(paste(prefix,".combine_heatmap.pdf", sep=""), width=widths*1.2, height=heights)
ptemp <- dev.cur()
png(paste(prefix,".combine_heatmap.png", sep=""), width=widths*75, height=heights*60)
dev.control("enable")
draw(ht, annotation_legend_list=lgd.list, column_title="", #不显示标题column_title="Metabolome"
column_title_side="bottom", column_title_gp=gpar(fontface='bold',fontsize=20),
padding=unit(c(2,2,10,2),"mm"))
decorate_annotation("Microbiome", {
grid.text("Microbiome", gp=gpar(fontface='bold', fontsize=20) ,
y=unit(1,"npc") + unit(2,"mm"), just ="bottom")}
)
dev.copy(which=ptemp)
dev.off()
dev.off()
}
2.2 相关性层次聚类热图
用法:
Rscript correlation_heatmap.R metabolites_differential.tsv genus_otu.tsv prefix
可视化示例图:
library(psych)
library(pheatmap)
library(reshape2)
options(bitmapType='cairo') #关闭服务器与界面的互动响应
run_data <- function(data){
data <- read.table(data, header=T, row.names=1, sep="\t", comment.char="",quote="", check.names=FALSE)
#check.names=FALSE 保留原始表头
data <- data[which(rowSums(data) > 0),] #过滤所有丰度都为0得行
data <- t(data)
return(data)
}
filter_pvalue_row <- function(data, pmax=0.05){
#过滤P值均大于0.05的行
r <- c()
for (i in rownames(data)){
if(min(data[i, ]) <= pmax){ #存在P值小于等于0.05的行
r <- c(r, i)
}else{
print(i)
}
}
return(r)
}
filter_rvalue_row <- function(data, rmin=0.6){#过滤R值均小于0.6的行
r <- c()
for (i in rownames(data)){
if(max(abs(data[i, ])) >= rmin){ #存在R值大于等于0.6的行
r <- c(r, i)
}else{
print(i)
}
}
return(r)
}
mark_pvalue <- function(pvalue){
if(!is.null(pvalue)){
site1 <- pvalue<0.01
pvalue[site1] <- "**"
site2 <- pvalue > 0.01& pvalue < 0.05
pvalue[site2] <- "*"
pvalue[!site2&!site1]<- ""
} else{
pvalue <- F
}
return(pvalue)
}
filter_pvalue <- function(pvalue, rvalue){
indexs <- which(apply(pvalue, 1, min, na.rm=TRUE) < 0.05)
pvalue <- pvalue[indexs,]
rvalue <- rvalue[indexs,]
r <- list(pvalue=pvalue, rvalue=rvalue)
return(r)
}
correlation_analysis <- function(data1, data2, prefix){
data1 <- run_data(data1)
data2 <- run_data(data2)
sample_id <- rownames(data1)
data2 <- data2[sample_id, ] #匹配样本
result <- corr.test(data1, data2, method="spearman", adjust="none") #pearson
rvalue <- result$r
pvalue <- result$p
rowpid <- filter_pvalue_row(pvalue, 0.05) #获取有P值小于0.05的行
pvalue <- t(pvalue[rowpid,])
colpid <- filter_pvalue_row(pvalue, 0.05) #获取有P值小于0.05的列
rvalue <- rvalue[rowpid, colpid]
rowrid <- filter_rvalue_row(rvalue, 0.6) #获取有R值小于0.6的行
rvalue <- t(rvalue[rowrid,])
colrid <- filter_rvalue_row(rvalue, 0.6) #R值小于0.6的行
rvalue <- rvalue[colrid, ]
pvalue <- pvalue[colrid, rowrid]
r <- filter_pvalue(pvalue, rvalue)
rvalue <- r$rvalue
pvalue <- r$pvalue
result <- melt(rvalue, value.name="cor")
result$pvalue <- as.vector(pvalue)
write.table(result, file=paste(prefix, ".correlation.xls", sep=""), row.names=FALSE, sep="\t", quote=FALSE, na ="")
pvalue <- mark_pvalue(pvalue)
mycol <- colorRampPalette(c("blue","white","tomato"))(800)
pheatmap(rvalue, scale="none", cluster_row=T, cluster_col=T,
border=NA, display_numbers=pvalue, fontsize_number=12,
number_color = "white", cellwidth=12, cellheight=15,
color=mycol, filename=paste(prefix, ".correlation_heatmap.pdf", sep=""))
pheatmap(rvalue, scale="none", cluster_row=T, cluster_col=T,
border=NA, display_numbers=pvalue, fontsize_number=12,
number_color = "white", cellwidth=12, cellheight=15,
color=mycol, filename=paste(prefix, ".correlation_heatmap.png", sep=""))
}
2.3 代谢物与微生物的相关性热图
用法:Rscript ellipse_heatmap.R metabolites_differential.tsv genus_otu.tsv prefix
可视化示例图:
library(psych)
library(corrplot)
library(reshape2)
options(bitmapType='cairo') #关闭服务器与界面的互动响应
run_data <- function(data){
data <- read.table(data, sep="\t", header=T, row.names=1, check.names=FALSE)
#check.names=FALSE 保留原始表头
data <- data[which(rowSums(data) > 0),] #过滤所有丰度都为0得行
data <- t(data)
return(data)
}
filter_pvalue_row <- function(data, pmax=0.05){#过滤P值均大于0.05的行
r <- c()
for (i in rownames(data)){
if(min(data[i, ]) <= pmax){ #存在P值小于等于0.05的行
r <- c(r, i)
}else{
print(i)
}
}
return(r)
}
filter_rvalue_row <- function(data, rmin=0.6){#过滤R值均小于0.6的行
r <- c()
for (i in rownames(data)){
if(max(abs(data[i, ])) >= rmin){ #存在R值大于等于0.6的行
r <- c(r, i)
}else{
print(i)
}
}
return(r)
}
max_name <- function(samples){ #获取样本的最长名称长度
maxlen <- 0
for(i in samples){
if(maxlen <= nchar(i)){
maxlen <- nchar(i)
}
}
return(maxlen)
}
ellipse_heatmap <- function(data1, data2, prefix){
data1 <- run_data(data1)
data2 <- run_data(data2)
sample_id1 <- rownames(data1) #获取样本名称
sample_id2 <- rownames(data2) #获取样本名称
sample_id <- intersect(sample_id1, sample_id2) #提取共有的样本名称
print(sample_id)
data1 <- data1[sample_id, ]
data2 <- data2[sample_id, ]
#连续数据,正态分布,线性关系,用pearson相关系数是最恰当
#上述任一条件不满足,就用spearman相关系数,不能用pearson相关系数。
result <- corr.test(data1, data2, method="spearman", adjust="none")
pvalue <- result$p
rowpid <- filter_pvalue_row(pvalue, 0.05) #获取有P值小于0.05的行
pvalue <- t(pvalue[rowpid,])
colpid <- filter_pvalue_row(pvalue, 0.05) #获取有P值小于0.05的列
rvalue <- result$r
rvalue <- rvalue[rowpid, colpid]
rowrid <- filter_rvalue_row(rvalue, 0.6) #获取有R值小于0.6的行
rvalue <- t(rvalue[rowrid,])
colrid <- filter_rvalue_row(rvalue, 0.6) #R值小于0.6的行
rvalue <- rvalue[colrid, ]
pvalue <- pvalue[colrid, rowrid]
#rows <- nrow(rvalue) #获取行数
rows <-length(colrid)
rowmaxlen <- max_name(colrid) #最长的行名称
#columns <- length(rvalue[1,]) #获取列数目
columns <- length(rowrid)
colmaxlen <- max_name(rowrid) #最长的列名称
tlcex <- 2
if(rows<=10){
heights <- 6
}else if(rows<=20){
heights <- 14
tlcex <- 1.8
}else if(rows<=50){
heights <- 18
tlcex <- 1.6
}else{
heights <- 22
tlcex <- 1.4
}
heights <- heights + rowmaxlen*0.15
if(columns<=4) {
widths <- 6
}else if(columns<=8) {
widths <- 10
}else {
widths <- 10 + (columns-8)*0.2
}
widths <- widths + colmaxlen*0.15
#col1 = colorRampPalette(colors=c("red", "white", "darkgreen"), space="Lab")
col1 = colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "white",
"cyan", "#007FFF", "blue", "#00007F"))
pdf(paste(prefix, ".ellipse_heatmap.pdf", sep=""), width=widths, height=heights)
a <- dev.cur()
png(paste(prefix, ".ellipse_heatmap.png", sep=""), width=widths*60, height=heights*60)
dev.control("enable")
corrplot(rvalue, p.mat=pvalue, method="ellipse", tl.col="black", tl.cex=tlcex,
insig="label_sig", sig.level=c(0.001, 0.01, 0.05),
pch.col="black", pch.cex=0.8) #不显著相关性系数图块上有X符号, col=col1(10)
#corrplot(rvalue, p.mat=pvalue, method="circle", type="lower",
# tl.col="black", tl.cex=tlcex, tl.srt=45,
# insig="label_sig", sig.level=c(0.001, 0.01, 0.05),
# pch.col="black", pch.cex=0.8) #不显著相关性系数图块上有X符号, col=col1(10)
dev.copy(which=a)
dev.off()
dev.off()
}
网友评论