【R语言与生物信息绘图】

作者: 森尼啊 | 来源:发表于2021-03-05 18:39 被阅读0次

【R语言与生物信息绘图】
R数据可视化|图形初阶（上）
学习小组D4-lz
2020-02-10/11 R语言直播课学习
R语言绘图包02--热图pheatmap
R语言绘图包06--基因表达相关性绘图corrplot
R语言绘图包03--火山图EnhancedVolcano
R语言绘图包05--韦恩图的绘制：ggvenn和VennDiag
R语言绘图包04--GOplot：富集分析结果可视化
富集分析结果雷达图

喜欢这个劳斯，来看他的生信绘图课，每一节就几分钟。
https://www.bilibili.com/video/BV1XJ411m73p

R 绘图系统

基础绘图
2.grid
lattice
ggplot2
【现有python版本python

预习

data()
点图、饼图、条形图、直方图

Rgallery

一个站点，搜集了利用R语言绘制的图，自行浏览、探索，用处很大

条形图/柱状图

m <- read.csv("Rdata/homo_length.csv",header = T)
class(m)
head(m)
x <- m[1:24,]
x
class(x$length)
barplot(height = x$length) #绘图时参数从少到多增加，方便找bug
barplot(height = x$length,names.arg = x$chr)#设置条形图标签，画完发现没显示全
barplot(height = x$length,names.arg = x$chr,las=3) #设置标签方向为竖
#colours() 包含657种颜色
yanse <- sample(colours(),24,replace = F)
barplot(height = x$length,names.arg = x$chr,col = yanse)
barplot(height = x$length,names.arg = x$chr,col = rainbow(7))
# horiz =T ，条形图横过来，beside= T, 并列条形图， =F，堆积的条形图

barplot(height = x$length,names.arg = x$chr,col = rainbow(7),border = F)
barplot(height = x$length,names.arg = x$chr,col = rainbow(7),border = F,
        main = "Human chromosome length distribution",xlab = "Chromosome Name",ylab = "Chromosome Length") # 设置标题、x轴，y轴

分组条形图

x <- read.csv("Rdata/sv_distrubution.csv",header = T,row.names = 1)
head(x)
# row.names = 1 ,表示第一列作为行名
x
barplot(x)
barplot(as.matrix(x))#按照突变类型画图的
barplot(t(as.matrix(x)))#改成按照染色体位置
barplot(t(as.matrix(x)),col = rainbow(4))
barplot(t(as.matrix(x)),col = rainbow(4),beside = T)
barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x))#barplot可直接在函数内部添加图例
barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x),ylim = c(0,800))
barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x),ylim = c(0,800),
        main = "SV Distribution",xlab="Chromosome Number",ylab="SV Numbers")

直方图

#条形图：离散形， 直方图： 连续型

x <- read.table("Rdata/H37Rv.gff",sep = "\t",header = F,skip = 7,quote = "")#rawdata，基因组注释文件，给出基因组每个位置具体功能；前面7行是注释文件，跳过；quote= "" 表示里面的字符串不用引号括起来。
x <- x[x$V3=="gene",]
x <- abs(x$V5-x$V4+1) #基因的长度；起始位置的不同，取绝对值保险一点
length(x)
range(x)#查看最长最短的基因
hist(x)
hist(x,breaks = 80)# breaks 
hist(x,breaks = c(0,500,1000,1500,2000,2500,15000))
hist(x,breaks = 80,freq = F)# 根据概率密度来画图，而不是默认的频数
hist(x,breaks = 80,density = T)
hist(rivers,density = T,breaks = 10)
?hist
#添加密度先
h=hist(x,nclass=80,col="pink",xlab="Gene Length (bp)",main="Histogram of Gene Length");
h
rug(x);
xfit<-seq(min(x),max(x),length=100);
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x));
yfit <- yfit*diff(h$mids[1:2])*length(x);
lines(xfit, yfit, col="blue", lwd=2);

散点图

m <- read.table("Rdata/prok_representative.txt",sep="\t");
genome_size <- m[,2];
gene_size <- m[,4];
plot(genome_size,gene_size,pch=16,
     xlab="Genome Size",ylab="Genes"); 
#pch选项指定点的参数，0-25，共26种选项。 cex 设置点的大小 
fit <- lm(gene_size ~ genome_size);
summary(fit)

abline( fit,col="blue",lwd=1.8 );
rr <- round( summary(fit)$adj.r.squared,2);
intercept <- round( summary(fit)$coefficients[1],2);
slope <- round( summary(fit)$coefficients[2],2);
eq <- bquote( atop( "y = " * .(slope) * " x + " * .(intercept), R^2 == .(rr) ) )
eq
text(12,6e3,eq);

饼图

#统计学家最不喜欢的图
x <- read.csv("Rdata/homo_length.csv",header = T)
x <- x[1:24,]
barplot(height = x$length,names.arg = x$chr)
pie(x$length/sum(x$length))

#3D饼图
m <- read.table("Rdata/Species.txt");
m
x <- m[,3]
pie(x);
pie(x,col=rainbow(length(x)))
lbls <- paste(m[,1],m[,2],"\n",m[,3],"%" )
pie(x,col=rainbow(length(x)),labels = lbls)
pie(x,col=rainbow(length(x)),labels = lbls)
pie(x,col=rainbow(length(x)),labels = lbls,radius = 1)
pie(x,col=rainbow(length(x)),labels = lbls,radius = 1,cex=0.8)

install.packages("plotrix")
BiocManager::install('plotrix')
library(plotrix)
pie3D(x,col=rainbow(length(x)),labels = lbls)
pie3D(x,col=rainbow(length(x)),labels = lbls,cex=0.8)
pieplot <- pie3D(x,col=rainbow(length(x)),radius = 1,explode = 0.1)#饼图炸开
pie3D.labels(pieplot,labels = lbls,labelcex = 0.8,height = 0.1,labelrad = 1.75)

#扇形图
fan.plot(x,col=rainbow(length(x)),labels = lbls,cex=0.8,radius = 1)

箱线图

boxplot(mpg ~cyl,data = mtcars)
m <- read.csv("Rdata/gene_expression.csv",header = T,row.names = 1)
head(m)
boxplot(m)
boxplot(m,outline=F) #outline=F 是把离群点去掉
install.packages("reshape2")
library(reshape2)
x <- melt(m)
head(x)
boxplot(value ~ variable,data = x) # y~x
boxplot(value ~ variable,data = x,outline=F)

boxplot(len ~ dose, data = ToothGrowth)
boxplot(len ~ dose:supp, data = ToothGrowth)
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"))
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),notch=T)#添加缺口
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),width=c(1,0.5,1,0.5,1,0.5))#设置每个盒子宽度
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),varwidth=T)
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),boxwex=0.5)#盒子宽度
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),staplewex=0.5)#最大最小值的线的宽度
boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),sep = ":",lex.order = T)#

par()函数

#参数集，用来设置选项，而不是绘图
opar <- par(no.readonly = T)
?par
par()
x <- par()
x$pch#点的形状
par(pch=16)#局部设置优先级大于全局设置；重启R 才回复
opar <- par(no.readonly = T)# 把不是只读的选项输出出来
opar
par(opar)#使用默认的设置

plot(women$height,women$weight)
plot(women$height,women$weight,type = "b",pch=16,col="red",lty=2,lwd=2,
     main = "Main Title",sub = "Sub Title",xlab = "Heigth",
     ylab = "Weight",xlim = c(50,100),ylim = c(100,200),cex=1.5,las=3,
     adj=0.3,bty="c",fg="blue",tck=-0.03,col.main="green",cex.lab=2)
#↑注意每个选项的数值类型；不断操作

颜色

#对比强烈；不张扬；色盲友好
colour()
rgb(0,1,0) #0~255 之间的数字
#FF0000，红色，十六进制
hsv(0,1,0)
hcl(0,1,0)
#颜色转换
rainbow(7)
pie(1:7,col=rainbow(7))
heat.colors(7)
pie(1:7,col=heat.colors(7))
terrain.colors(7)
pie(1:7,col=terrain.colors(7))
topo.colors(7)
pie(1:7,col=topo.colors(7))
cm.colors(7)
pie(1:7,col=cm.colors(7))
gray.colors(7)
pie(1:7,col=gray.colors(7))

# RColorBrewer
display.brewer.all()
library(RColorBrewer)
display.brewer.all()
display.brewer.pal("Set1")
brewer.pal.info
yanse <- brewer.pal(name = "Set1",n=7)
pie(1:7, col = yanse)

热图

#1 heatmap()
#2 gplots ∞¸ heatmap.2()
#3 pheatmap∞¸ pheatmap
#4 ComplexHeatmap∞¸

example("heatmap")
m <- read.csv("Rdata/heatmap.csv",header = T,row.names = 1)
class(m)
x <- as.matrix(m)
heatmap(x)
heatmap(t(x))
# 比较重要的选项 颜色 
heatmap(x,col=c("green","red"))
yanse <- colorRampPalette(c("red","black","green"))(nrow(x))# 渐变色
heatmap(x,col=yanse)
heatmap(x,col=yanse,RowSideColors = yanse)
heatmap(x,col=yanse,ColSideColors = colorRampPalette(c("red","black","green"))(ncol(x)))
heatmap(x,col=yanse,Rowv = NA) # 取消按行聚类
heatmap(x,col=yanse,Rowv = NA,Colv = NA)# 取消按行、按列聚类
heatmap(state.x77)
heatmap(state.x77,scale = "col")#标准化

#install.packages("gplots")
library(gplots)
heatmap.2(x)
example(heatmap.2)
heatmap.2(x)
heatmap.2(x,key = F)#不显示图例
heatmap.2(x,symkey = F)# 图例颜色不对称
heatmap.2(x,symkey = T,density.info = "none")
heatmap.2(x,symkey = T,trace = "none")#不添加竖线
heatmap.2(x,symkey = T,tracecol = "black")#线的颜色为黑
heatmap.2(x,dendrogram = "none")
heatmap.2(x,dendrogram = "row")
heatmap.2(x,dendrogram = "col")

#install.packages("pheatmap")
library(pheatmap) #很美观，现在用的多
example("pheatmap")
pheatmap(x)
annotation_col <- data.frame(CellType=factor(rep(c("N1","T1","N2","T2"),each=5)))
rownames(annotation_col) <- colnames(x)
pheatmap(x,annotation_col = annotation_col)
pheatmap(x,annotation_col = annotation_col,display_numbers = T)#热图上显示数字
pheatmap(x,annotation_col = annotation_col,display_numbers = T,number_format = "%.2f") #保留两位
pheatmap(x,annotation_col = annotation_col,display_numbers = T,number_format = "%.1f",number_color = "black")

韦恩图

#集合的计算
#1 venneuler包中的venneuler()-- 比较强大
#2 gplots venn() -比较简单
#3 venn包中的venn()
#4 venndiagram包中的venn.diagram函数 --常用
listA <- read.csv("Rdata/genes_list_A.txt",header=FALSE)
A <- listA$V1
listB <- read.csv("Rdata/genes_list_B.txt",header=FALSE)
B <- listB$V1
listC <- read.csv("Rdata/genes_list_C.txt",header=FALSE)
C <- listC$V1
listD <- read.csv("Rdata/genes_list_D.txt",header=FALSE)
D <- listD$V1
listE <- read.csv("Rdata/genes_list_E.txt",header=FALSE)
E <- listE$V1
length(A);length(B);length(C);length(D);length(E)
library(VennDiagram)
#输入数据是列表；不能再绘图窗口中显示，必须保存到文件中
venn.diagram(list(C = C, D = D),fill = c("yellow","cyan"), cex = 1.5,filename = "venn2.png")# cex 样品的标签放大1.5倍
venn.diagram(list(A = A, C = C, D = D), fill = c("yellow","red","cyan"), cex = 1.5,filename="venn3.png")
venn.diagram(list(A = A, B = B, C = C, D = D), fill = c("yellow","red","cyan","forestgreen"), cex = 1.5,filename="venn4.png")
venn.diagram(list(A = A, B = B, C = C, D = D , E = E ), fill = c("yellow","red","cyan","forestgreen","lightblue"), cex = 1.5,filename="venn5.png")

曼哈顿图

#二维散点图展示大量数据，用于全基因组关联分析GWAS中
#通过曼哈顿图可以比较容易找到差异显著的 snps点
install.packages("qqman")
library(qqman)
library(RColorBrewer)
str(gwasResults)
head(gwasResults)
manhattan(gwasResults)# 默认点是5 和8 
manhattan(gwasResults, main = "Manhattan Plot", ylim = c(0, 6), cex = 0.6,cex.axis = 0.9, col = c("blue4", "orange3"), suggestiveline =
            F, genomewideline = F,chrlabs = c(1:20, "P", "Q"))
# 太小的点，也就是p只太小，是离群点，没有实际意义
unique(gwasResults$CHR)
number <- length(unique(gwasResults$CHR))
yanse <- brewer.pal(n = 4,name = "Set1")
manhattan(gwasResults, main = "Manhattan Plot", ylim = c(0, 6), cex = 0.6,cex.axis = 0.9, col = yanse, suggestiveline =
            F, genomewideline = F,chrlabs = c(1:20, "P", "Q"))

manhattan(subset(gwasResults,CHR==3))
# 高亮显示部分SNP结果
snpsOfInterest
manhattan(gwasResults, highlight = snpsOfInterest)
# 注释SNP结果
manhattan(gwasResults, annotatePval = 0.001)
manhattan(gwasResults, annotatePval = 0.001, annotateTop = FALSE)

火山图

#RNA-seq中 1.表达量的值>2, 2.校正后的p值，也就是q值<0.05 ,0.0.1
m <- read.csv("Rdata/Deseq2.csv",header = T,row.names = 1)
head(m)
m <- na.omit(m)
plot(m$log2FoldChange,m$padj)
plot(m$log2FoldChange,-1*log10(m$padj))
plot(m$log2FoldChange,-1*log10(m$padj),xlim = c(-10,10),ylim=c(0,100))
#添加颜色；没有col的选项，将数据分成三份，加上颜色
#好神奇
m <- transform(m,padj=-1*log10(m$padj))# 对数据框的列进行修改
down <- m[m$log2FoldChange<=-1,] 
up <- m[m$log2FoldChange>=1,]
no <- m[m$log2FoldChange>-1 & m$log2FoldChange <1,] 

plot(no$log2FoldChange,no$padj,xlim = c(-10,10),ylim=c(0,100),col="blue",pch=16,cex=0.8,main = "Gene Expression",xlab = "log2FoldChange",ylab="-log10(Qvalue)")
#低级绘图命令，就是在已经绘制好的图中加上简单的元素，线、点、文字等
points(up$log2FoldChange,up$padj,col="red",pch=16,cex=0.8)
points(down$log2FoldChange,down$padj,col="green",pch=16,cex=0.8)

GC-depth图

#评估基因组拼接效果；不会放在发表文章中
opar <- par(no.readonly = T)#保留原始设置
par(mfrow=c(2,3)) #按行排列，两行三列，六个图；平均分配空间；不平均--layout
plot(pressure)
m <- read.table("Rdata/GC-depth.txt");
head(m)
nf <- layout(matrix(c(0,2,0,0,1,3),2,3,byrow=T),c(0.5,3,1),c(1,3,0.5),TRUE);
par(mar=c(5,5,0.5,0.5));#反复调整得到
layout.show(3)
par("mar")
par(mar=c(5,5,0.5,0.5));
x <- m[,1];
y <- m[,2];
plot(x,y,xlab='GC Content(%)',ylab='Depth',pch=16,col="#FF000077",xlim=c(0,100),ylim=c(0,max(y)))
hist(x,breaks = 100)
#将hist图保存到变量里
xhist <- hist(x,breaks=100,plot=FALSE);
yhist <- hist(y,breaks=floor(max(y)-0),plot=FALSE);

par(mar=c(0,5,1,1));
barplot(xhist$counts,space=0,xlim=c(0,100) );

par(mar=c(5,0,1,1));
barplot(yhist$counts,space=0,horiz=TRUE,ylim=c(0,max(y) ) )
#难点
#1. layout()进行布局
#2. par()中 mar 进行设置
#3.将绘图内容保存为变量，再从对象中选出数据绘图

COG功能注释

#蛋白质直系同源数据库，包含藻类、细菌，真核生物中21个完整基因组的编码蛋白、系统进化关系分类而成

m <- read.table("Rdata/cog.class.annot.txt",head=T,sep="\t");
head(m)
layout(matrix(c(1,2),nr=1),widths=c(20,13));
layout.show(2)
par( mar=c(3,4,4,1)+0.1 );

class <- c("J","A","K","L","B","D","Y","V","T","M","N","Z","W","U","O","C","G","E","F","H","I","P","Q","R","S");
t <- factor( as.character(m$Code),levels=class )
m <- m[order(t),]

barplot(m$Gene.Number,space=F,col=rainbow(25),ylab="Number of genes",names.arg = m$Code)

l <- c(0,5,15,23,25);
id<- c("INFORMATION STORAGE\nAND PROCESSING","CELLULAR PROCESSES\nAND SIGNALING","METABOLISM","POORLY\nCHARACTERIZED")
abline( v = l[c(-1,-5)]);

for( i in 2:length(l) ){
  text( (l[i-1]+l[i])/2,max(m[,3])*1.1,id[i-1],cex=0.8,xpd=T)
}

par(mar=c(2,0,2,1)+0.1 );
plot(0,0,type="n",xlim=c(0,1),ylim=c(0,26),bty="n",axes=F,xlab="",ylab="")

for( i in 1:length(class) ){
  text(0,26-i+0.5,paste(m$Code[i],m$Functional.Categories[i]),pos=4,cex=1,pty=T)
}
title(main = "COG function classification");

GO功能注释条形图

library(ggplot2)

go <- read.csv("Rdata/go.csv",header = T)
head(go)
go_sort <- go[order(go$Ontology,-go$Percentage),]
m <- go_sort[go_sort$Ontology=="Molecular function",][1:10,]
c <- go_sort[go_sort$Ontology=="Cellular component",][1:10,]
b <- go_sort[go_sort$Ontology=="Biological process",][1:10,]
slimgo <- rbind(b,c,m)

#先将Term转为因子
slimgo$Term=factor(slimgo$Term,levels=slimgo$Term)

colnames(slimgo)
pp <- ggplot(data = slimgo, mapping = aes(x=Term,y=Percentage,fill=Ontology))
pp
pp+geom_bar(stat="identity")#分组式，而不是堆叠式
pp+geom_bar(stat="identity")+coord_flip()
pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))
pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))+guides(fill=FALSE)#去掉图例
pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))+guides(fill=FALSE)+theme_bw()

kegg功能注释图

library(ggplot2)

pathway <-  read.csv("Rdata/kegg.csv",header=T)
head(pathway)
colnames(pathway)

pp <-  ggplot(data=pathway,mapping = aes(x=richFactor,y=Pathway))
pp
pp + geom_point()
pp + geom_point(aes(size=AvsB))
pp + geom_point(aes(size=AvsB,color=Qvalue))
pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")
pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")+labs(title="Top20 of pathway enrichment",x="Rich factor",y="Pathway Name",color="-log10(Qvalue)",size="Gene Numbers")
pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")+labs(title="Top20 of pathway enrichment",x="Rich factor",y="Pathway Name",color="-log10(Qvalue)",size="Gene Numbers")+theme_bw()

【R语言与生物信息绘图】
喜欢这个劳斯，来看他的生信绘图课，每一节就几分钟。https://www.bilibili.com/video/B...
R数据可视化|图形初阶（上）
在生物信息学中常常会用到R语言来绘制图形以实现数据可视化，本文主要通过plot( )高级绘图函数来介绍R语言绘图的...
学习小组D4-lz
R语言与RStudio R是一种编程语言，适用与绘图与统计。 RStudio 是R语言的IDE 安装路径避免中文 ...
2020-02-10/11 R语言直播课学习
一. R与Rstudio R：编程语言，统计计算和绘图的环境。 R studio： R语言的集成开发环境新建项...
R语言绘图包02--热图pheatmap
R语言绘图包系列： R语言绘图包01--优秀的拼图包patchwork[https://www.jianshu.c...
R语言绘图包06--基因表达相关性绘图corrplot
R语言绘图包系列： R语言绘图包01--优秀的拼图包patchwork[https://www.jianshu.c...
R语言绘图包03--火山图EnhancedVolcano
R语言绘图包系列： R语言绘图包01--优秀的拼图包patchwork[https://www.jianshu.c...
R语言绘图包05--韦恩图的绘制：ggvenn和VennDiag
R语言绘图包系列： R语言绘图包01--优秀的拼图包patchwork[https://www.jianshu.c...
R语言绘图包04--GOplot：富集分析结果可视化
R语言绘图包系列： R语言绘图包01--优秀的拼图包patchwork[https://www.jianshu.c...
富集分析结果雷达图
R语言绘图包系列： R语言绘图包01--优秀的拼图包patchwork[https://www.jianshu.c...