美文网首页基因组数据绘图ggplot2绘图
跟着Nature Communications绘制eQTL相关图

跟着Nature Communications绘制eQTL相关图

作者: 小杜的生信筆記 | 来源:发表于2022-08-30 21:13 被阅读0次

导入相关R包

## 导入相关的包
library(ggplot2)
library(RColorBrewer)
library(ggrepel)
library(patchwork)
#
# 设置颜色
cols <- brewer.pal(8,"Set2"

construct dataframe of cis eQTL numbers

df <- data.frame(eqtl <- rep(c("primary", "secondary","tertiary","quaternary"), each = 2),
rtc <- rep(c("no", "yes"), 4),
count <- c((3951-1242), 1242, (528-158), 158, (63-16), 16, 3,0)
)

df$eqtl <- factor(df$eqtl, levels = c("primary", "secondary", "tertiary", "quaternary"))
annotation <- data.frame(
  x = c(1,2,3,4),
  y = c(3951+200,528+200,63+200,3+200),
  label = c("1,242/3,951", "158/528", "16/63", "0/3")
)
df
        label        mean        lower        upper
1         MPV -0.08384950 -0.088530392 -0.079168608
2      Plt_DW -0.05221740 -0.056855387 -0.047579413
3    Plt_crit -0.02656630 -0.031027730 -0.022104870
4     Lympho% -0.01307940 -0.017696905 -0.008461895
5        Mono  0.00994619  0.005400656  0.014491724
6       Retic  0.01156940  0.006906560  0.016232240
7      Retic%  0.01232000  0.007596851  0.017043149
8        WBCC  0.01233040  0.007655428  0.017005372
9        Neut  0.01343360  0.008765194  0.018102006
10    Eosino%  0.01404300  0.009369752  0.018716248
11        Plt  0.01665860  0.012120044  0.021197156
12  HLS_retic  0.01678330  0.012094784  0.021471816
13 HLS_retic%  0.01737100  0.012648008  0.022093992
14  Imm_retic  0.01897520  0.014257441  0.023692959
15        IBD  0.06049690  0.040473021  0.080520779
16         UC  0.06654070  0.041283574  0.091797826
17    Coeliac  0.09257918  0.056132056  0.129026306
18        SLE  0.11332868  0.055923827  0.170733543
19        DM1  0.11796080  0.082692519  0.153229077
20        PBC  0.12044615  0.060241468  0.180650838

Figure 1A

## number of cis eQTL by conditional rank and the proportion with a colocalised
## 简单的堆积图
Figure1A <- ggplot(df, aes(x = eqtl, y = count))+
  geom_col(aes(fill = rtc), width = 0.7)+
  scale_fill_manual(values = cols[c(8,4)]) +
  scale_colour_manual(values = cols[c(8,4)]) +
  theme_bw() + xlab("Conditional eQTL Rank") +
  theme(legend.position = "none", axis.text=element_text(size=12),
        axis.title=element_text(size=14)) +
  geom_text(data=annotation, aes( x=x, y=y, label=label),
            color=cols[c(4)], 
            size=5 , fontface="bold" )

Figure 1B

# Read in eQTL sharing estimates from moloc
moloc.out <- read.table("Figure1B.data.txt", header = F)
## 统计
moloc.out$sig <- 0
moloc.out$sig[which(moloc.out$V3<0.2)] <- 1
moloc.out$sig[which(moloc.out$V3>0.8)] <- 2

cols <- brewer.pal(8,"Set2")
> moloc.out[1:5,]
                V1      V2        V3 sig
1       rs79408265 4050121 0.1489246   1
2         rs140522 2350504 0.9937962   2
3 chr5:133495080:I 2940369 0.9956974   2
4 chr5:133544970:D 1740240 0.1384381   1
5         rs154866  770379 0.9561278   2
Figure1B<-ggplot(moloc.out, aes(x=(1-V3), fill=factor(sig), colour=factor(sig))) + 
  geom_histogram(binwidth=0.012, alpha=0.72) +
  scale_fill_manual(values = cols[c(8,1,2)]) +
  scale_colour_manual(values = cols[c(8,1,2)]) +
  theme_bw() + 
  xlab("Probability eQTL is specific to NK cells") + theme(legend.position = "none") +
  ## 添加左右标签
  annotate("text", x = -0.02, y = 775, label = "Shared", size=10, hjust=0) + ylim(NA,800) +
  annotate("text", x = 1.02, y = 775, label = "Unique", size=10, hjust=1) +
  geom_hline(yintercept=0, color=cols[8]) +
theme(axis.text=element_text(size=12),
      axis.title=element_text(size=14))

Figure1Ca.data

trait   label   coloc   pos h_coloc h_pos   fc  p
GCST005840  LV_Stroke   0   5   1   16  0   1
GCST006907  LV_Stroke   0   5   1   16  0   1
GCST006910  Emb_Stroke  0   16  2   56  0   1
GCST006906  Stroke  0   44  2   143 0   1
GCST005528  JIA 1   47  16  171 0.211574169 1
GCST007236  Breast_Ca   47  1669    115 2147    0.512095934 1
30060_irnt  MCHC    12  324 69  1311    0.692457139 1
3064_irnt   PEF 12  390 69  1631    0.718768944 1
102_irnt    Pulse_rate_auto 26  620 115 2147    0.773490359 1
4080_irnt   SBP 29  683 115 2147    0.783586175 1
GCST007799  Asthma  5   107 21  359 0.789368181 1
30020_irnt  Hb  45  1010    115 2147    0.824038423 1
30030_irnt  HCT 44  955 115 2147    0.853467416 1
21001_irnt  BMI 46  994 115 2147    0.857435636 1
3063_irnt   FEV1    40  832 115 2147    0.892438398 1
3062_irnt   FVC 48  969 115 2147    0.920915269 1
GCST005569  RA  5   79  20  297 0.935980074 1
78_irnt BMD 34  664 115 2147    0.953636423 1
GCST004901  ALS 1   16  3   46  0.956270609 1
GCST003156  SLE 7   168 26  612 0.979953346 1
49_irnt Hip_circ    53  996 115 2147    0.993095408 1
50_irnt Height  116 2147    115 2147    1.009190856 1

Figure1Cb.data

trait   label   coloc   pos h_coloc h_pos   fc  p
GCST005527  Psoriasis   0   8   1   30  0   1
GCST007799  Asthma  0   18  3   64  0   1
GCST003156  SLE 0   24  1   78  0   1
30060_irnt  MCHC    1   71  7   186 0.366433968 0.450903976
30240_irnt  Retic%  5   238 8   311 0.813070805 0.784264603
78_irnt BMD 4   168 8   311 0.923924734 1
GCST007236  Breast_Ca   7   258 8   311 1.056139602 1
50_irnt Height  14  510 8   311 1.06897477  1
21002_irnt  Weight  8   291 8   311 1.070520055 1
30300_irnt  HLS_retic%  7   252 8   311 1.081990156 1
GCST006867  DM2 1   45  3   152 1.128093255 1
4080_irnt   SBP 5   171 8   311 1.140489986 0.77800753
30250_irnt  Retic%  7   235 8   311 1.162501789 0.796597413
3063_irnt   FEV1    6   197 8   311 1.189368671 0.785172208

# Enrichment of shared causal loci between NK cell eQTL
total_enrich <- read.table("Figure1Ca.data.txt", header = T)
## 矫正P值
total_enrich$p.adj <- p.adjust(total_enrich$p, method = "fdr")

unique_enrich <- read.table("Figure1Cb.data.txt", header = T)
unique_enrich$p.adj <- p.adjust(unique_enrich$p, method = "fdr")
#Prepare data for facet plot
unique_enrich <- unique_enrich[order(unique_enrich$p), ]
unique_enrich$threshold <- c(rep(1,4),rep(0,54))
unique_enrich$label <- as.character(unique_enrich$label)
unique_enrich$is_annotate <- c(rep(1,4),rep(0,54))

total_enrich <- total_enrich[order(total_enrich$p), ]
total_enrich$threshold <- c(rep(1,8),rep(0,65))
total_enrich$label <- as.character(total_enrich$label)
total_enrich$is_annotate <- c(rep(1,8),rep(0,65))

total_enrich$expt <- "total"
unique_enrich$expt <- "unique"
facet_enrich <- rbind(total_enrich, unique_enrich)
Figure1C <- ggplot(facet_enrich, aes(x = -log(p,10), y = fc, 
colour=as.factor(threshold), fill=as.factor(threshold))) +
  geom_point(aes(size = pos, alpha = 0.72), pch = 21, 
  show.legend = FALSE) +
  facet_grid(. ~ expt) +
  theme_bw() +
  theme(strip.background = element_rect(color="black", fill=cols[8], size=1.5, linetype="solid"), strip.text.x = element_text(
    size = 12, color = "white", face = "bold.italic"), legend.position = "none", axis.text=element_text(size=12),
    axis.title=element_text(size=14))+
  scale_size(range = c(5,10)) +
  scale_fill_manual(values = cols[c(8,4)]) +
  scale_colour_manual(values = cols[c(8,4)])  +
  ylab("Fold change") +
  xlab("-log10(P)") +
  coord_flip() +
  geom_text_repel( data=subset(facet_enrich, is_annotate==1), aes(label=label), size=4, col = c("black"))

Nature系列的文章,图片配色和排版让人看着就是舒服--

END!!

@小杜的生信筆記 ,主要发表或收录生物信息学的教程,以及基于R的分析和可视化(包括数据分析,图形绘制等);分享感兴趣的文献和学习资料!

相关文章

网友评论

    本文标题:跟着Nature Communications绘制eQTL相关图

    本文链接:https://www.haomeiwen.com/subject/ihlrnrtx.html