读取数据
rm(list=ls())
library(GEOquery)
library(limma)
library(ggplot2)
library(ggrepel)
library(ggthemes)
ssetwd('D:\\GEO数据挖掘与meta分析\\练习\\【资料】13.火山图(代码)\\火山图(代码)')
#读取DEG for micoarray by limma处理后的数据
x <- read.csv("GSE57691_limmaOut_symbol.csv", row.names = 1)
x$label<- rownames(x)
head(x)
logFC AveExpr t P.Value adj.P.Val B
HBA2 4.166594 -0.7648996 6.359721 2.632198e-06 7.625411e-05 4.7594674
HBB 4.045908 -0.9470568 5.583480 1.530187e-05 2.204782e-04 3.0182836
FOSB 2.531847 -0.5720219 3.287262 3.511204e-03 7.711441e-03 -2.2979304
FCN1 2.507169 0.2643620 6.132292 4.376426e-06 1.056799e-04 4.2564416
IL1B 2.138769 0.5494871 5.626432 1.385743e-05 2.069859e-04 3.1163153
CXCL8 1.913179 0.5359573 4.216517 3.868419e-04 1.429247e-03 -0.1615813
label
HBA2 HBA2
HBB HBB
FOSB FOSB
FCN1 FCN1
IL1B IL1B
CXCL8 CXCL8
基础版火山图
#筛选差异基因
#设置阈值
logFCcut <- 1.5 #1 2
pvalCut <- 0.01 #0.05 0.001
# 选取p.value < 0.01,且|logFC|>1.5的基因
sum(x$P.Val < pvalCut)
[1] 10608
#deg.p = x[x$P.Val <pvalCut & (x$logFC > logFCcut | x$logFC < -logFCcut),]
#也可以根据adj.P.Val进行筛选
#deg.adj <- x[x$adj.P.Val <pvalCut & (x$logFC > logFCcut | x$logFC < -logFCcut),]
画图
#设置上下调基因颜色
x[,8] <- ifelse((x$P.Value < pvalCut & x$logFC > logFCcut), "red", ifelse((x$P.Value < pvalCut & x$logFC < -logFCcut), "blue","grey30"))#新增一列,标明颜色的属性
#设置点的size
size <- ifelse((x$P.Value < pvalCut & abs(x$logFC) > logFCcut), 4, 2)
#设置x,y轴的最大最小位置
xmin <- (range(x$logFC)[1]-(range(x$logFC)[1]+5))
xmax <- (range(x$logFC)[1]+(5-range(x$logFC)[1]))
ymin <- 0
ymax <- 12
#ymax <- range(-log(x$P.Val))[2]+1
Construct the plot object
p1 <- ggplot(data=x, aes(x=logFC, y=-log10(P.Value), label = label)) +
geom_point(alpha = 0.6, size=size, colour=x[,8]) +
scale_color_manual(values = c("lightgrey", "navy", "red")) +
labs(x=bquote(~Log[2]~"(fold change)"), y=bquote(~-Log[10]~italic("P-value")), title="") +
ylim(c(ymin,ymax)) +
scale_x_continuous(
breaks = c(-5,-3, -logFCcut, 0, logFCcut, 3, 5), #刻度线的位置
labels = c(-5,-3, -logFCcut, 0, logFCcut, 3, 5),
limits = c(-6, 6) #x轴范围,两侧对称才好看
)
画阈值分界线
p2 <- p1+geom_vline(xintercept = logFCcut, color="grey40", linetype="longdash", size=0.5) +
geom_vline(xintercept = -logFCcut, color="grey40", linetype="longdash", size=0.5) +
geom_hline(yintercept = -log10(pvalCut), color="grey40", linetype="longdash", size=0.5) +
guides(colour = guide_legend(override.aes = list(shape=16)))
p3 <- p2+theme_bw(base_size = 12, base_family = "Times") +
theme(legend.position="right",
panel.grid=element_blank(),
legend.title = element_blank(),
legend.text= element_text(face="bold", color="black",family = "Times", size=8),
plot.title = element_text(hjust = 0.8),
axis.text.x = element_text(face="bold", color="black", size=15),
axis.text.y = element_text(face="bold", color="black", size=15),
axis.title.x = element_text(face="bold", color="black", size=16),
axis.title.y = element_text(face="bold",color="black", size=16))
显示 logFC > 3 的基因的基因名
p3 + geom_text_repel(aes(x = logFC, y = -log10(P.Value),
label = ifelse(logFC > 3, rownames(x),"")),
colour="darkred", size = 5, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"))
突出展示感兴趣的基因
selectedGeneID <- read.table("highlight_gene.txt",header = T)
selectgenes <- x[row.names(x) %in% selectedGeneID$Symbol,]
head(selectgenes)
logFC AveExpr t P.Value adj.P.Val B
HBB 4.045908 -0.9470568 5.583480 1.530187e-05 0.0002204782 3.0182836
FOSB 2.531847 -0.5720219 3.287262 3.511204e-03 0.0077114409 -2.2979304
FCN1 2.507169 0.2643620 6.132292 4.376426e-06 0.0001056799 4.2564416
IL1B 2.138769 0.5494871 5.626432 1.385743e-05 0.0002069859 3.1163153
CXCL8 1.913179 0.5359573 4.216517 3.868419e-04 0.0014292472 -0.1615813
UCP2 1.901167 0.2595262 3.925862 7.750142e-04 0.0023467341 -0.8393134
label V8
HBB HBB red
FOSB FOSB red
FCN1 FCN1 red
IL1B IL1B red
CXCL8 CXCL8 red
UCP2 UCP2 red
进阶版火山图
logFCcut <- 1
logFCcut2 <- 1.5
logFCcut3 <- 2
pvalCut <- 0.05
pvalCut2 <- 0.01 #for advanced mode
pvalCut3 <- 0.001 #for advanced mode
adjPcut <- 0.05
x <- read.csv("GSE57691_limmaOut_symbol.csv", row.names = 1)
x$label<- rownames(x)
#复杂的setting for color
n1 <- length(x[,1]) #x的行数
cols <- rep("grey30",n1)
names(cols)<- rownames(x)
head(cols)
#HBA2 HBB FOSB FCN1 IL1B CXCL8
#"grey30" "grey30" "grey30" "grey30" "grey30" "grey30"
cols[x$P.Value < pvalCut & x$logFC >logFCcut]<- "#FB9A99"
cols[x$P.Value < pvalCut2 & x$logFC > logFCcut2]<- "#ED4F4F"
cols[x$P.Value < pvalCut & x$logFC < -logFCcut]<- "#B2DF8A"
cols[x$P.Value < pvalCut2 & x$logFC < -logFCcut2]<- "#329E3F"
#cols[names(cols)==genelist]<- "red"
color_transparent <- adjustcolor(cols, alpha.f = 0.5) # set color transparence
x[,8] <- color_transparent #在x中新增一列颜色的属性
head(x$V8)
[1] "#ED4F4F80" "#ED4F4F80" "#ED4F4F80" "#ED4F4F80" "#ED4F4F80" "#ED4F4F80"
setting for circle size
n1 <- length(x[,1])
size <- rep(1,n1)
size[x$P.Value < pvalCut & x$logFC > logFCcut]<- 2
size[x$P.Value < pvalCut2 & x$logFC > logFCcut2]<- 4
size[x$P.Value < pvalCut3 & x$logFC > logFCcut3]<- 6
size[x$P.Value < pvalCut & x$logFC < -logFCcut]<- 2
size[x$P.Value < pvalCut2 & x$logFC < -logFCcut2]<- 4
size[x$P.Value < pvalCut3 & x$logFC < -logFCcut3]<- 6
x,y轴的最大最小位置
#x,y轴的最大最小位置
xmin <- (range(x$logFC)[1]-(range(x$logFC)[1]+5))
xmax <- (range(x$logFC)[1]+(5-range(x$logFC)[1]))
ymin <- 0
ymax <- 12
#ymax <- range(-log(x$P.Val))[2]+1
p1 <- ggplot(data=x, aes(x=logFC, y=-log10(P.Value), label = label)) +
geom_point(alpha = 0.6, size=size, colour=x[,8]) +
scale_color_manual(values = c("lightgrey", "navy", "red")) +
labs(x=bquote(~Log[2]~"(fold change)"), y=bquote(~-Log[10]~italic("P-value")), title="") +
ylim(c(ymin,ymax)) +
scale_x_continuous(
breaks = c(-5, -3, -logFCcut, 0, logFCcut, 3, 5), #刻度线的位置
labels = c(-5, -3, -logFCcut, 0, logFCcut, 3, 5),
limits = c(-6, 6) #x轴范围,两侧对称才好看
) +
#或用下面这行:
#xlim(c(xmin, xmax)) +
#画阈值分界线
geom_vline(xintercept = logFCcut, color="grey40", linetype="longdash", size=0.5) +
geom_vline(xintercept = -logFCcut, color="grey40", linetype="longdash", size=0.5) +
geom_hline(yintercept = -log10(pvalCut), color="grey40", linetype="longdash", size=0.5) +
guides(colour = guide_legend(override.aes = list(shape=16)))+
theme_bw(base_size = 12, base_family = "Times") +
theme(legend.position="right",
panel.grid=element_blank(),
legend.title = element_blank(),
legend.text= element_text(face="bold", color="black",family = "Times", size=8),
plot.title = element_text(hjust = 0.8),
axis.text.x = element_text(face="bold", color="black", size=16),
axis.text.y = element_text(face="bold", color="black", size=16),
axis.title.x = element_text(face="bold", color="black", size=16),
axis.title.y = element_text(face="bold",color="black", size=16))
p1
p1 <- p1 +
geom_vline(xintercept = logFCcut2, color="grey40", linetype="longdash", size=0.5) +
geom_vline(xintercept = -logFCcut2, color="grey40", linetype="longdash", size=0.5) +
geom_hline(yintercept = -log10(pvalCut2), color="grey40", linetype="longdash", size=0.5)
p1
显示 logFC > 3 的基因的基因名
p1 + geom_text_repel(aes(x = logFC, y = -log10(P.Value),
label = ifelse(logFC > 3, rownames(x),"")),
colour="darkred", size = 5, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"))
突出展示感兴趣的基因
selectedGeneID <- read.table("highlight_gene.txt",header = T)
selectgenes <- x[row.names(x) %in% selectedGeneID$Symbol,]
head(selectgenes)
p2 <- p1 +
# 在感兴趣的基因外面画个黑色圈
geom_point(data = selectgenes, alpha = 1, size = 4.6, shape = 1,
stroke = 1, #圈粗细
color = "black") +
# 显示感兴趣的基因的基因名
geom_text_repel(data = selectgenes,
colour="darkred", size = 5, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"))
p2
网友评论