美文网首页R plotbioinformatics
【R语言与生物信息绘图】

【R语言与生物信息绘图】

作者: 森尼啊 | 来源:发表于2021-03-05 18:39 被阅读0次

    喜欢这个劳斯,来看他的生信绘图课,每一节就几分钟。
    https://www.bilibili.com/video/BV1XJ411m73p

    R 绘图系统

    1. 基础绘图
      2.grid
    2. lattice
    3. ggplot2
      【现有python版本python

    预习

    data()
    点图、饼图、条形图、直方图
    

    Rgallery

    一个站点,搜集了利用R语言绘制的图,自行浏览、探索,用处很大

    条形图/柱状图

    m <- read.csv("Rdata/homo_length.csv",header = T)
    class(m)
    head(m)
    x <- m[1:24,]
    x
    class(x$length)
    barplot(height = x$length) #绘图时参数从少到多增加,方便找bug
    barplot(height = x$length,names.arg = x$chr)#设置条形图标签,画完发现没显示全
    barplot(height = x$length,names.arg = x$chr,las=3) #设置标签方向为竖
    #colours() 包含657种颜色
    yanse <- sample(colours(),24,replace = F)
    barplot(height = x$length,names.arg = x$chr,col = yanse)
    barplot(height = x$length,names.arg = x$chr,col = rainbow(7))
    # horiz =T ,条形图横过来,beside= T, 并列条形图, =F,堆积的条形图
    
    barplot(height = x$length,names.arg = x$chr,col = rainbow(7),border = F)
    barplot(height = x$length,names.arg = x$chr,col = rainbow(7),border = F,
            main = "Human chromosome length distribution",xlab = "Chromosome Name",ylab = "Chromosome Length") # 设置标题、x轴,y轴
    
    

    分组条形图

    x <- read.csv("Rdata/sv_distrubution.csv",header = T,row.names = 1)
    head(x)
    # row.names = 1 ,表示第一列作为行名
    x
    barplot(x)
    barplot(as.matrix(x))#按照突变类型画图的
    barplot(t(as.matrix(x)))#改成按照染色体位置
    barplot(t(as.matrix(x)),col = rainbow(4))
    barplot(t(as.matrix(x)),col = rainbow(4),beside = T)
    barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x))#barplot可直接在函数内部添加图例
    barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x),ylim = c(0,800))
    barplot(t(as.matrix(x)),col = rainbow(4),legend.text = colnames(x),ylim = c(0,800),
            main = "SV Distribution",xlab="Chromosome Number",ylab="SV Numbers")
    

    直方图

    #条形图:离散形, 直方图: 连续型
    
    x <- read.table("Rdata/H37Rv.gff",sep = "\t",header = F,skip = 7,quote = "")#rawdata,基因组注释文件,给出基因组每个位置具体功能;前面7行是注释文件,跳过;quote= "" 表示里面的字符串不用引号括起来。
    x <- x[x$V3=="gene",]
    x <- abs(x$V5-x$V4+1) #基因的长度;起始位置的不同,取绝对值保险一点
    length(x)
    range(x)#查看最长最短的基因
    hist(x)
    hist(x,breaks = 80)# breaks 
    hist(x,breaks = c(0,500,1000,1500,2000,2500,15000))
    hist(x,breaks = 80,freq = F)# 根据概率密度来画图,而不是默认的频数
    hist(x,breaks = 80,density = T)
    hist(rivers,density = T,breaks = 10)
    ?hist
    #添加密度先
    h=hist(x,nclass=80,col="pink",xlab="Gene Length (bp)",main="Histogram of Gene Length");
    h
    rug(x);
    xfit<-seq(min(x),max(x),length=100);
    yfit<-dnorm(xfit,mean=mean(x),sd=sd(x));
    yfit <- yfit*diff(h$mids[1:2])*length(x);
    lines(xfit, yfit, col="blue", lwd=2);
    

    散点图

    m <- read.table("Rdata/prok_representative.txt",sep="\t");
    genome_size <- m[,2];
    gene_size <- m[,4];
    plot(genome_size,gene_size,pch=16,
         xlab="Genome Size",ylab="Genes"); 
    #pch选项指定点的参数,0-25,共26种选项。 cex 设置点的大小 
    fit <- lm(gene_size ~ genome_size);
    summary(fit)
    
    abline( fit,col="blue",lwd=1.8 );
    rr <- round( summary(fit)$adj.r.squared,2);
    intercept <- round( summary(fit)$coefficients[1],2);
    slope <- round( summary(fit)$coefficients[2],2);
    eq <- bquote( atop( "y = " * .(slope) * " x + " * .(intercept), R^2 == .(rr) ) )
    eq
    text(12,6e3,eq);
    

    饼图

    #统计学家最不喜欢的图
    x <- read.csv("Rdata/homo_length.csv",header = T)
    x <- x[1:24,]
    barplot(height = x$length,names.arg = x$chr)
    pie(x$length/sum(x$length))
    
    #3D饼图
    m <- read.table("Rdata/Species.txt");
    m
    x <- m[,3]
    pie(x);
    pie(x,col=rainbow(length(x)))
    lbls <- paste(m[,1],m[,2],"\n",m[,3],"%" )
    pie(x,col=rainbow(length(x)),labels = lbls)
    pie(x,col=rainbow(length(x)),labels = lbls)
    pie(x,col=rainbow(length(x)),labels = lbls,radius = 1)
    pie(x,col=rainbow(length(x)),labels = lbls,radius = 1,cex=0.8)
    
    install.packages("plotrix")
    BiocManager::install('plotrix')
    library(plotrix)
    pie3D(x,col=rainbow(length(x)),labels = lbls)
    pie3D(x,col=rainbow(length(x)),labels = lbls,cex=0.8)
    pieplot <- pie3D(x,col=rainbow(length(x)),radius = 1,explode = 0.1)#饼图炸开
    pie3D.labels(pieplot,labels = lbls,labelcex = 0.8,height = 0.1,labelrad = 1.75)
    
    #扇形图
    fan.plot(x,col=rainbow(length(x)),labels = lbls,cex=0.8,radius = 1)
    

    箱线图

    boxplot(mpg ~cyl,data = mtcars)
    m <- read.csv("Rdata/gene_expression.csv",header = T,row.names = 1)
    head(m)
    boxplot(m)
    boxplot(m,outline=F) #outline=F 是把离群点去掉
    install.packages("reshape2")
    library(reshape2)
    x <- melt(m)
    head(x)
    boxplot(value ~ variable,data = x) # y~x
    boxplot(value ~ variable,data = x,outline=F)
    
    boxplot(len ~ dose, data = ToothGrowth)
    boxplot(len ~ dose:supp, data = ToothGrowth)
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"))
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),notch=T)#添加缺口
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),width=c(1,0.5,1,0.5,1,0.5))#设置每个盒子宽度
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),varwidth=T)
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),boxwex=0.5)#盒子宽度
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),staplewex=0.5)#最大最小值的线的宽度
    boxplot(len ~ dose:supp, data = ToothGrowth,col = c("orange", "red"),sep = ":",lex.order = T)#
    

    par()函数

    #参数集,用来设置选项,而不是绘图
    opar <- par(no.readonly = T)
    ?par
    par()
    x <- par()
    x$pch#点的形状
    par(pch=16)#局部设置优先级大于全局设置;重启R 才回复
    opar <- par(no.readonly = T)# 把不是只读的选项输出出来
    opar
    par(opar)#使用默认的设置
    
    plot(women$height,women$weight)
    plot(women$height,women$weight,type = "b",pch=16,col="red",lty=2,lwd=2,
         main = "Main Title",sub = "Sub Title",xlab = "Heigth",
         ylab = "Weight",xlim = c(50,100),ylim = c(100,200),cex=1.5,las=3,
         adj=0.3,bty="c",fg="blue",tck=-0.03,col.main="green",cex.lab=2)
    #↑注意每个选项的数值类型;不断操作
    

    颜色

    #对比强烈;不张扬;色盲友好
    colour()
    rgb(0,1,0) #0~255 之间的数字
    #FF0000,红色,十六进制
    hsv(0,1,0)
    hcl(0,1,0)
    #颜色转换
    rainbow(7)
    pie(1:7,col=rainbow(7))
    heat.colors(7)
    pie(1:7,col=heat.colors(7))
    terrain.colors(7)
    pie(1:7,col=terrain.colors(7))
    topo.colors(7)
    pie(1:7,col=topo.colors(7))
    cm.colors(7)
    pie(1:7,col=cm.colors(7))
    gray.colors(7)
    pie(1:7,col=gray.colors(7))
    
    # RColorBrewer
    display.brewer.all()
    library(RColorBrewer)
    display.brewer.all()
    display.brewer.pal("Set1")
    brewer.pal.info
    yanse <- brewer.pal(name = "Set1",n=7)
    pie(1:7, col = yanse)
    

    热图

    #1 heatmap()
    #2 gplots ∞¸ heatmap.2()
    #3 pheatmap∞¸ pheatmap
    #4 ComplexHeatmap∞¸
    
    example("heatmap")
    m <- read.csv("Rdata/heatmap.csv",header = T,row.names = 1)
    class(m)
    x <- as.matrix(m)
    heatmap(x)
    heatmap(t(x))
    # 比较重要的选项 颜色 
    heatmap(x,col=c("green","red"))
    yanse <- colorRampPalette(c("red","black","green"))(nrow(x))# 渐变色
    heatmap(x,col=yanse)
    heatmap(x,col=yanse,RowSideColors = yanse)
    heatmap(x,col=yanse,ColSideColors = colorRampPalette(c("red","black","green"))(ncol(x)))
    heatmap(x,col=yanse,Rowv = NA) # 取消按行聚类
    heatmap(x,col=yanse,Rowv = NA,Colv = NA)# 取消按行、按列聚类
    heatmap(state.x77)
    heatmap(state.x77,scale = "col")#标准化
    
    #install.packages("gplots")
    library(gplots)
    heatmap.2(x)
    example(heatmap.2)
    heatmap.2(x)
    heatmap.2(x,key = F)#不显示图例
    heatmap.2(x,symkey = F)# 图例颜色不对称
    heatmap.2(x,symkey = T,density.info = "none")
    heatmap.2(x,symkey = T,trace = "none")#不添加竖线
    heatmap.2(x,symkey = T,tracecol = "black")#线的颜色为黑
    heatmap.2(x,dendrogram = "none")
    heatmap.2(x,dendrogram = "row")
    heatmap.2(x,dendrogram = "col")
    
    #install.packages("pheatmap")
    library(pheatmap) #很美观,现在用的多
    example("pheatmap")
    pheatmap(x)
    annotation_col <- data.frame(CellType=factor(rep(c("N1","T1","N2","T2"),each=5)))
    rownames(annotation_col) <- colnames(x)
    pheatmap(x,annotation_col = annotation_col)
    pheatmap(x,annotation_col = annotation_col,display_numbers = T)#热图上显示数字
    pheatmap(x,annotation_col = annotation_col,display_numbers = T,number_format = "%.2f") #保留两位
    pheatmap(x,annotation_col = annotation_col,display_numbers = T,number_format = "%.1f",number_color = "black")
    

    韦恩图

    #集合的计算
    #1 venneuler包中的venneuler()-- 比较强大
    #2 gplots venn() -比较简单
    #3 venn包中的venn()
    #4 venndiagram包中的venn.diagram函数 --常用
    listA <- read.csv("Rdata/genes_list_A.txt",header=FALSE)
    A <- listA$V1
    listB <- read.csv("Rdata/genes_list_B.txt",header=FALSE)
    B <- listB$V1
    listC <- read.csv("Rdata/genes_list_C.txt",header=FALSE)
    C <- listC$V1
    listD <- read.csv("Rdata/genes_list_D.txt",header=FALSE)
    D <- listD$V1
    listE <- read.csv("Rdata/genes_list_E.txt",header=FALSE)
    E <- listE$V1
    length(A);length(B);length(C);length(D);length(E)
    library(VennDiagram)
    #输入数据是列表;不能再绘图窗口中显示,必须保存到文件中
    venn.diagram(list(C = C, D = D),fill = c("yellow","cyan"), cex = 1.5,filename = "venn2.png")# cex 样品的标签放大1.5倍
    venn.diagram(list(A = A, C = C, D = D), fill = c("yellow","red","cyan"), cex = 1.5,filename="venn3.png")
    venn.diagram(list(A = A, B = B, C = C, D = D), fill = c("yellow","red","cyan","forestgreen"), cex = 1.5,filename="venn4.png")
    venn.diagram(list(A = A, B = B, C = C, D = D , E = E ), fill = c("yellow","red","cyan","forestgreen","lightblue"), cex = 1.5,filename="venn5.png")
    

    曼哈顿图

    #二维散点图展示大量数据,用于全基因组关联分析GWAS中
    #通过曼哈顿图可以比较容易找到差异显著的 snps点
    install.packages("qqman")
    library(qqman)
    library(RColorBrewer)
    str(gwasResults)
    head(gwasResults)
    manhattan(gwasResults)# 默认点是5 和8 
    manhattan(gwasResults, main = "Manhattan Plot", ylim = c(0, 6), cex = 0.6,cex.axis = 0.9, col = c("blue4", "orange3"), suggestiveline =
                F, genomewideline = F,chrlabs = c(1:20, "P", "Q"))
    # 太小的点,也就是p只太小,是离群点,没有实际意义
    unique(gwasResults$CHR)
    number <- length(unique(gwasResults$CHR))
    yanse <- brewer.pal(n = 4,name = "Set1")
    manhattan(gwasResults, main = "Manhattan Plot", ylim = c(0, 6), cex = 0.6,cex.axis = 0.9, col = yanse, suggestiveline =
                F, genomewideline = F,chrlabs = c(1:20, "P", "Q"))
    
    manhattan(subset(gwasResults,CHR==3))
    # 高亮显示部分SNP结果
    snpsOfInterest
    manhattan(gwasResults, highlight = snpsOfInterest)
    # 注释SNP结果
    manhattan(gwasResults, annotatePval = 0.001)
    manhattan(gwasResults, annotatePval = 0.001, annotateTop = FALSE)
    

    火山图

    #RNA-seq中 1.表达量的值>2, 2.校正后的p值,也就是q值<0.05 ,0.0.1
    m <- read.csv("Rdata/Deseq2.csv",header = T,row.names = 1)
    head(m)
    m <- na.omit(m)
    plot(m$log2FoldChange,m$padj)
    plot(m$log2FoldChange,-1*log10(m$padj))
    plot(m$log2FoldChange,-1*log10(m$padj),xlim = c(-10,10),ylim=c(0,100))
    #添加颜色;没有col的选项,将数据分成三份,加上颜色
    #好神奇
    m <- transform(m,padj=-1*log10(m$padj))# 对数据框的列进行修改
    down <- m[m$log2FoldChange<=-1,] 
    up <- m[m$log2FoldChange>=1,]
    no <- m[m$log2FoldChange>-1 & m$log2FoldChange <1,] 
    
    plot(no$log2FoldChange,no$padj,xlim = c(-10,10),ylim=c(0,100),col="blue",pch=16,cex=0.8,main = "Gene Expression",xlab = "log2FoldChange",ylab="-log10(Qvalue)")
    #低级绘图命令,就是在已经绘制好的图中加上简单的元素,线、点、文字等
    points(up$log2FoldChange,up$padj,col="red",pch=16,cex=0.8)
    points(down$log2FoldChange,down$padj,col="green",pch=16,cex=0.8)
    

    GC-depth图

    #评估基因组拼接效果;不会放在发表文章中
    opar <- par(no.readonly = T)#保留原始设置
    par(mfrow=c(2,3)) #按行排列,两行三列,六个图;平均分配空间;不平均--layout
    plot(pressure)
    m <- read.table("Rdata/GC-depth.txt");
    head(m)
    nf <- layout(matrix(c(0,2,0,0,1,3),2,3,byrow=T),c(0.5,3,1),c(1,3,0.5),TRUE);
    par(mar=c(5,5,0.5,0.5));#反复调整得到
    layout.show(3)
    par("mar")
    par(mar=c(5,5,0.5,0.5));
    x <- m[,1];
    y <- m[,2];
    plot(x,y,xlab='GC Content(%)',ylab='Depth',pch=16,col="#FF000077",xlim=c(0,100),ylim=c(0,max(y)))
    hist(x,breaks = 100)
    #将hist图保存到变量里
    xhist <- hist(x,breaks=100,plot=FALSE);
    yhist <- hist(y,breaks=floor(max(y)-0),plot=FALSE);
    
    par(mar=c(0,5,1,1));
    barplot(xhist$counts,space=0,xlim=c(0,100) );
    
    par(mar=c(5,0,1,1));
    barplot(yhist$counts,space=0,horiz=TRUE,ylim=c(0,max(y) ) )
    #难点
    #1. layout()进行布局
    #2. par()中 mar 进行设置
    #3.将绘图内容保存为变量,再从对象中选出数据绘图
    

    COG功能注释

    #蛋白质直系同源数据库,包含藻类、细菌,真核生物中21个完整基因组的编码蛋白、系统进化关系分类而成
    
    m <- read.table("Rdata/cog.class.annot.txt",head=T,sep="\t");
    head(m)
    layout(matrix(c(1,2),nr=1),widths=c(20,13));
    layout.show(2)
    par( mar=c(3,4,4,1)+0.1 );
    
    class <- c("J","A","K","L","B","D","Y","V","T","M","N","Z","W","U","O","C","G","E","F","H","I","P","Q","R","S");
    t <- factor( as.character(m$Code),levels=class )
    m <- m[order(t),]
    
    barplot(m$Gene.Number,space=F,col=rainbow(25),ylab="Number of genes",names.arg = m$Code)
    
    l <- c(0,5,15,23,25);
    id<- c("INFORMATION STORAGE\nAND PROCESSING","CELLULAR PROCESSES\nAND SIGNALING","METABOLISM","POORLY\nCHARACTERIZED")
    abline( v = l[c(-1,-5)]);
    
    for( i in 2:length(l) ){
      text( (l[i-1]+l[i])/2,max(m[,3])*1.1,id[i-1],cex=0.8,xpd=T)
    }
    
    par(mar=c(2,0,2,1)+0.1 );
    plot(0,0,type="n",xlim=c(0,1),ylim=c(0,26),bty="n",axes=F,xlab="",ylab="")
    
    for( i in 1:length(class) ){
      text(0,26-i+0.5,paste(m$Code[i],m$Functional.Categories[i]),pos=4,cex=1,pty=T)
    }
    title(main = "COG function classification");
    

    GO功能注释条形图

    library(ggplot2)
    
    go <- read.csv("Rdata/go.csv",header = T)
    head(go)
    go_sort <- go[order(go$Ontology,-go$Percentage),]
    m <- go_sort[go_sort$Ontology=="Molecular function",][1:10,]
    c <- go_sort[go_sort$Ontology=="Cellular component",][1:10,]
    b <- go_sort[go_sort$Ontology=="Biological process",][1:10,]
    slimgo <- rbind(b,c,m)
    
    #先将Term转为因子
    slimgo$Term=factor(slimgo$Term,levels=slimgo$Term)
    
    colnames(slimgo)
    pp <- ggplot(data = slimgo, mapping = aes(x=Term,y=Percentage,fill=Ontology))
    pp
    pp+geom_bar(stat="identity")#分组式,而不是堆叠式
    pp+geom_bar(stat="identity")+coord_flip()
    pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))
    pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))+guides(fill=FALSE)#去掉图例
    pp+geom_bar(stat="identity")+coord_flip()+scale_x_discrete(limits=rev(levels(slimgo$Term)))+guides(fill=FALSE)+theme_bw()
    

    kegg功能注释图

    library(ggplot2)
    
    pathway <-  read.csv("Rdata/kegg.csv",header=T)
    head(pathway)
    colnames(pathway)
    
    pp <-  ggplot(data=pathway,mapping = aes(x=richFactor,y=Pathway))
    pp
    pp + geom_point()
    pp + geom_point(aes(size=AvsB))
    pp + geom_point(aes(size=AvsB,color=Qvalue))
    pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")
    pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")+labs(title="Top20 of pathway enrichment",x="Rich factor",y="Pathway Name",color="-log10(Qvalue)",size="Gene Numbers")
    pp + geom_point(aes(size=AvsB,color=Qvalue)) + scale_colour_gradient(low="green",high="red")+labs(title="Top20 of pathway enrichment",x="Rich factor",y="Pathway Name",color="-log10(Qvalue)",size="Gene Numbers")+theme_bw()
    

    相关文章

      网友评论

        本文标题:【R语言与生物信息绘图】

        本文链接:https://www.haomeiwen.com/subject/bxgrqltx.html