美文网首页
2021-06-30 火山图

2021-06-30 火山图

作者: 学习生信的小兔子 | 来源:发表于2021-06-30 08:10 被阅读0次

读取数据

rm(list=ls())
library(GEOquery)
library(limma)
library(ggplot2)
library(ggrepel)
library(ggthemes)

ssetwd('D:\\GEO数据挖掘与meta分析\\练习\\【资料】13.火山图(代码)\\火山图(代码)')

#读取DEG for micoarray by limma处理后的数据
x <- read.csv("GSE57691_limmaOut_symbol.csv", row.names = 1)
x$label<- rownames(x)
head(x)
 logFC    AveExpr        t      P.Value    adj.P.Val          B
HBA2  4.166594 -0.7648996 6.359721 2.632198e-06 7.625411e-05  4.7594674
HBB   4.045908 -0.9470568 5.583480 1.530187e-05 2.204782e-04  3.0182836
FOSB  2.531847 -0.5720219 3.287262 3.511204e-03 7.711441e-03 -2.2979304
FCN1  2.507169  0.2643620 6.132292 4.376426e-06 1.056799e-04  4.2564416
IL1B  2.138769  0.5494871 5.626432 1.385743e-05 2.069859e-04  3.1163153
CXCL8 1.913179  0.5359573 4.216517 3.868419e-04 1.429247e-03 -0.1615813
      label
HBA2   HBA2
HBB     HBB
FOSB   FOSB
FCN1   FCN1
IL1B   IL1B
CXCL8 CXCL8

基础版火山图

#筛选差异基因
#设置阈值
logFCcut <- 1.5 #1 2
pvalCut <- 0.01 #0.05 0.001
# 选取p.value < 0.01,且|logFC|>1.5的基因
sum(x$P.Val < pvalCut)
[1] 10608
#deg.p = x[x$P.Val <pvalCut & (x$logFC > logFCcut | x$logFC < -logFCcut),]

#也可以根据adj.P.Val进行筛选
#deg.adj <- x[x$adj.P.Val <pvalCut & (x$logFC > logFCcut | x$logFC < -logFCcut),]

画图

#设置上下调基因颜色
x[,8] <- ifelse((x$P.Value < pvalCut & x$logFC > logFCcut), "red", ifelse((x$P.Value < pvalCut & x$logFC < -logFCcut), "blue","grey30"))#新增一列,标明颜色的属性
#设置点的size
size <- ifelse((x$P.Value < pvalCut & abs(x$logFC) > logFCcut), 4, 2)

#设置x,y轴的最大最小位置
xmin <- (range(x$logFC)[1]-(range(x$logFC)[1]+5))
xmax <- (range(x$logFC)[1]+(5-range(x$logFC)[1]))
ymin <- 0
ymax <- 12
#ymax <- range(-log(x$P.Val))[2]+1

Construct the plot object

p1 <- ggplot(data=x, aes(x=logFC, y=-log10(P.Value), label = label)) +
  geom_point(alpha = 0.6, size=size, colour=x[,8]) +
  scale_color_manual(values = c("lightgrey", "navy", "red")) + 
  
  labs(x=bquote(~Log[2]~"(fold change)"), y=bquote(~-Log[10]~italic("P-value")), title="") + 
  ylim(c(ymin,ymax)) + 
  scale_x_continuous(
    breaks = c(-5,-3, -logFCcut, 0, logFCcut, 3, 5), #刻度线的位置
    labels = c(-5,-3, -logFCcut, 0, logFCcut, 3, 5),
    limits = c(-6, 6) #x轴范围,两侧对称才好看 
  ) 

画阈值分界线

p2 <-  p1+geom_vline(xintercept = logFCcut, color="grey40", linetype="longdash", size=0.5) +
  geom_vline(xintercept = -logFCcut, color="grey40", linetype="longdash", size=0.5) +
  geom_hline(yintercept = -log10(pvalCut), color="grey40", linetype="longdash", size=0.5) +
  
  guides(colour = guide_legend(override.aes = list(shape=16)))
p3 <- p2+theme_bw(base_size = 12, base_family = "Times") +
  theme(legend.position="right",
        panel.grid=element_blank(),
        legend.title = element_blank(),
        legend.text= element_text(face="bold", color="black",family = "Times", size=8),
        plot.title = element_text(hjust = 0.8),
        axis.text.x = element_text(face="bold", color="black", size=15),
        axis.text.y = element_text(face="bold",  color="black", size=15),
        axis.title.x = element_text(face="bold", color="black", size=16),
        axis.title.y = element_text(face="bold",color="black", size=16))

显示 logFC > 3 的基因的基因名

p3 + geom_text_repel(aes(x = logFC, y = -log10(P.Value), 
                         label = ifelse(logFC > 3, rownames(x),"")),
                     colour="darkred", size = 5, box.padding = unit(0.35, "lines"), 
                     point.padding = unit(0.3, "lines"))

突出展示感兴趣的基因

selectedGeneID <- read.table("highlight_gene.txt",header = T)
selectgenes <- x[row.names(x) %in% selectedGeneID$Symbol,]
head(selectgenes)
logFC    AveExpr        t      P.Value    adj.P.Val          B
HBB   4.045908 -0.9470568 5.583480 1.530187e-05 0.0002204782  3.0182836
FOSB  2.531847 -0.5720219 3.287262 3.511204e-03 0.0077114409 -2.2979304
FCN1  2.507169  0.2643620 6.132292 4.376426e-06 0.0001056799  4.2564416
IL1B  2.138769  0.5494871 5.626432 1.385743e-05 0.0002069859  3.1163153
CXCL8 1.913179  0.5359573 4.216517 3.868419e-04 0.0014292472 -0.1615813
UCP2  1.901167  0.2595262 3.925862 7.750142e-04 0.0023467341 -0.8393134
      label  V8
HBB     HBB red
FOSB   FOSB red
FCN1   FCN1 red
IL1B   IL1B red
CXCL8 CXCL8 red
UCP2   UCP2 red

进阶版火山图

logFCcut <- 1 
logFCcut2 <- 1.5 
logFCcut3 <- 2 

pvalCut <- 0.05
pvalCut2 <- 0.01 #for advanced mode
pvalCut3 <- 0.001 #for advanced mode

adjPcut <- 0.05

x <- read.csv("GSE57691_limmaOut_symbol.csv", row.names = 1)
x$label<- rownames(x)
#复杂的setting for color
n1 <- length(x[,1])  #x的行数
cols <- rep("grey30",n1)
names(cols)<- rownames(x)
head(cols)
#HBA2      HBB     FOSB     FCN1     IL1B    CXCL8 
#"grey30" "grey30" "grey30" "grey30" "grey30" "grey30" 
cols[x$P.Value < pvalCut & x$logFC >logFCcut]<- "#FB9A99"
cols[x$P.Value < pvalCut2 & x$logFC > logFCcut2]<- "#ED4F4F"
cols[x$P.Value < pvalCut & x$logFC < -logFCcut]<- "#B2DF8A"
cols[x$P.Value < pvalCut2 & x$logFC < -logFCcut2]<- "#329E3F"
#cols[names(cols)==genelist]<- "red"
color_transparent <- adjustcolor(cols, alpha.f = 0.5)  # set color transparence
x[,8] <- color_transparent  #在x中新增一列颜色的属性
head(x$V8)
[1] "#ED4F4F80" "#ED4F4F80" "#ED4F4F80" "#ED4F4F80" "#ED4F4F80" "#ED4F4F80"

setting for circle size

n1 <- length(x[,1])
size <- rep(1,n1)
size[x$P.Value < pvalCut & x$logFC > logFCcut]<- 2
size[x$P.Value < pvalCut2 & x$logFC > logFCcut2]<- 4
size[x$P.Value < pvalCut3 & x$logFC > logFCcut3]<- 6
size[x$P.Value < pvalCut & x$logFC < -logFCcut]<- 2
size[x$P.Value < pvalCut2 & x$logFC < -logFCcut2]<- 4
size[x$P.Value < pvalCut3 & x$logFC < -logFCcut3]<- 6

x,y轴的最大最小位置

#x,y轴的最大最小位置
xmin <- (range(x$logFC)[1]-(range(x$logFC)[1]+5))
xmax <- (range(x$logFC)[1]+(5-range(x$logFC)[1]))
ymin <- 0
ymax <- 12
#ymax <- range(-log(x$P.Val))[2]+1
p1 <- ggplot(data=x, aes(x=logFC, y=-log10(P.Value), label = label)) +
  geom_point(alpha = 0.6, size=size, colour=x[,8]) +
  scale_color_manual(values = c("lightgrey", "navy", "red")) + 
  
  labs(x=bquote(~Log[2]~"(fold change)"), y=bquote(~-Log[10]~italic("P-value")), title="") + 
  ylim(c(ymin,ymax)) + 
  scale_x_continuous(
    breaks = c(-5, -3, -logFCcut, 0, logFCcut, 3, 5), #刻度线的位置
    labels = c(-5, -3, -logFCcut, 0, logFCcut, 3, 5),
    limits = c(-6, 6) #x轴范围,两侧对称才好看
  ) +
  #或用下面这行:
  #xlim(c(xmin, xmax)) + 
  
  #画阈值分界线
  geom_vline(xintercept = logFCcut, color="grey40", linetype="longdash", size=0.5) +
  geom_vline(xintercept = -logFCcut, color="grey40", linetype="longdash", size=0.5) +
  geom_hline(yintercept = -log10(pvalCut), color="grey40", linetype="longdash", size=0.5) +
  
  guides(colour = guide_legend(override.aes = list(shape=16)))+
  
  theme_bw(base_size = 12, base_family = "Times") +
  theme(legend.position="right",
        panel.grid=element_blank(),
        legend.title = element_blank(),
        legend.text= element_text(face="bold", color="black",family = "Times", size=8),
        plot.title = element_text(hjust = 0.8),
        axis.text.x = element_text(face="bold", color="black", size=16),
        axis.text.y = element_text(face="bold",  color="black", size=16),
        axis.title.x = element_text(face="bold", color="black", size=16),
        axis.title.y = element_text(face="bold",color="black", size=16))
p1
p1 <- p1 + 
  geom_vline(xintercept = logFCcut2, color="grey40", linetype="longdash", size=0.5) +
  geom_vline(xintercept = -logFCcut2, color="grey40", linetype="longdash", size=0.5) +
  geom_hline(yintercept = -log10(pvalCut2), color="grey40", linetype="longdash", size=0.5)

p1

显示 logFC > 3 的基因的基因名

p1 + geom_text_repel(aes(x = logFC, y = -log10(P.Value), 
                         label = ifelse(logFC > 3, rownames(x),"")),
                     colour="darkred", size = 5, box.padding = unit(0.35, "lines"), 
                     point.padding = unit(0.3, "lines"))

突出展示感兴趣的基因

selectedGeneID <- read.table("highlight_gene.txt",header = T)
selectgenes <- x[row.names(x) %in% selectedGeneID$Symbol,]
head(selectgenes)

p2 <- p1 + 
  # 在感兴趣的基因外面画个黑色圈
  geom_point(data = selectgenes, alpha = 1, size = 4.6, shape = 1, 
             stroke = 1, #圈粗细
             color = "black") +
  
  # 显示感兴趣的基因的基因名
  geom_text_repel(data = selectgenes, 
                  colour="darkred", size = 5, box.padding = unit(0.35, "lines"), 
                  point.padding = unit(0.3, "lines"))
p2

相关文章

  • 2021-06-30 火山图

    读取数据 基础版火山图 画图 Construct the plot object 画阈值分界线 显示 logFC ...

  • 一个比较简洁的火山图作图包

    又是火山图: 火山图。。。。。真的很多了ggplot做火山图---添加任意基因标签|||突出显示标记基因[http...

  • ggplot做对角线火山图---与单细胞差异基因可视化更配哦

    火山图又双叒叕来了,之前做的火山图已经很精美了。 1、绝美!差异基因火山图大全![http://mp.weixin...

  • 火山图

    火山图 library(ggplot2) 4. 画图 r03 = ggplot(data,aes(log2FC,-...

  • 火山图

    画第一张图 画第二张图

  • 火山图

    在分析RNAseq和microarray数据的差异基因的时候,常常用到火山图,需要的数据是包含Fold chang...

  • 火山图

    标准的火山图常用于展示显著差异表达的基因,这里有两个关键词:显著是指P<0.05,差异表达一般我们按照Fold C...

  • 火山图

    参考这篇:R数据可视化1: 火山图

  • 画火山图

    volcano plot 火山图 火山图(Volcano Plot)是做RNA-Seq分析的时候特别常用的一张图,...

  • ggplot2优雅的绘制火山图

    关于火山图,绘制的教程有很多也有不少专门绘制火山图的包,说到底火山图无非就是散点图的变形,本节来介绍如何通过ggp...

网友评论

      本文标题:2021-06-30 火山图

      本文链接:https://www.haomeiwen.com/subject/zvvlultx.html