美文网首页IMP researchR for statisticsbioinformatics
跟着Nature学作图:R语言ggplot2散点栅格化能够减小输

跟着Nature学作图:R语言ggplot2散点栅格化能够减小输

作者: 小明的数据分析笔记本 | 来源:发表于2023-01-10 21:00 被阅读0次

    论文

    A saturated map of common genetic variants associated with human height

    https://www.nature.com/articles/s41586-022-05275-y

    s41586-022-05275-y.pdf

    代码没有公开,但是作图数据基本都公开了,争取把每个图都重复一遍

    今天的推文重复论文中的Figure1

    代码

    setwd("data/20221014")
    library(readxl)
    fig1<-read_excel("Figure1.xlsx")
    colnames(fig1)
    
    library(tidyverse)
    library(stringr)
    
    str_replace_all("[0,5e-100]","\\(|5e-|\\]|\\[","") %>% 
      str_split_fixed(",",n=2) %>% 
      as.data.frame() %>% 
      pull(V1) %>% as.numeric()
    str_replace_all("[0,5e-100]","\\(|5e-|\\]|\\[","") %>% 
      str_split_fixed(",",n=2) %>% 
      as.data.frame() %>% 
      pull(V2) %>% as.numeric()
    fig1 %>% 
      mutate(max_value=str_replace_all(`P-value Caregory`,"\\(|5e-|\\]|\\[","") %>% 
               str_split_fixed(",",n=2) %>% 
               as.data.frame() %>% 
               pull(V1) %>% as.numeric(),
             min_value=str_replace_all(`P-value Caregory`,"\\(|5e-|\\]|\\[","") %>% 
               str_split_fixed(",",n=2) %>% 
               as.data.frame() %>% 
               pull(V2) %>% as.numeric()) %>% 
      mutate(group=case_when(
        min_value == 100 & max_value == 0  ~ "group01",
        min_value == 50 & max_value == 100 ~ "group02",
        min_value == 20 & max_value == 50 ~ "group03",
        min_value == 10 & max_value == 20 ~ "group04",
        min_value == 8 & max_value == 10 ~ "group05",
      )) -> new.fig1
    
    table(new.fig1$group)
    
    library(ggplot2)
    library(ggh4x)
    library(cowplot)
    
    ggplot(data=new.fig1,
           aes(x=`Minor Allele Frequency`,
               y=`Join Effect of Minor Allele`,
               color=group))+
      geom_point( key_glyph = rectangle_key_glyph(color=color,
                                                  fill=color,
                                                  padding = margin(3, 3, 3, 3)))+
      scale_color_manual(values = c("group01"="#ee82ee",
                                    "group02"="#2e8b57",
                                    "group03"="#1e90ff",
                                    "group04"="#daa520",
                                    "group05"="#cdc673"),
                         name="",
                         labels=c("group01"="P < 5 × 10–100 (672 SNPs)",
                                  "group02"="5 × 10–50 > P > 5 × 10–100 (1,110 SNPs)",
                                  "group03"="5 × 10–20 > P > 5 × 10–50 (3,513 SNPs)",
                                  "group04"="5 × 10–10 > P > 5 × 10–20 (5,192 SNPs)",
                                  "group05"="5 × 10–8 > P > 5 × 10–10 (1,624 SNPs)"))+
      theme_bw()+
      theme(panel.grid = element_blank(),
            panel.border = element_blank(),
            axis.line = element_line(),
            legend.position = c(0.7,0.8))+
      scale_x_continuous(breaks = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5),
                         labels = c(1,5,10,20,30,40,50))+
      scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1,0.2,0.3),
                         limits = c(-0.3,0.3))+
      guides(x=guide_axis_truncated(trunc_lower = 0.01,
                                trunc_upper = 0.5),
             y=guide_axis_truncated(trunc_lower = -0.3,
                                    trunc_upper = 0.3))+
      labs(x="MAF (%) in cross-ancestry meta-analysis",
           y="Joint effect sizes (s.d.) of minor alleles\nin cross-ancestry meta-analysis")+
      geom_hline(yintercept = 0,color="gray")+
      geom_smooth(data = new.fig1 %>% 
                    filter(group=="group01") %>%
                    filter(`Join Effect of Minor Allele`<0),
                  aes(x=`Minor Allele Frequency`,
                      y=`Join Effect of Minor Allele`),
                  method = 'loess',
                  formula = 'y~x',
                  se=FALSE,color="gray",
                  show.legend = FALSE)+
      geom_smooth(data = new.fig1 %>% 
                    filter(group=="group01") %>%
                    filter(`Join Effect of Minor Allele`>0),
                  aes(x=`Minor Allele Frequency`,
                      y=`Join Effect of Minor Allele`),
                  method = 'loess',
                  formula = 'y~x',
                  se=FALSE,color="gray",
                  show.legend = FALSE)
    
    image.png

    关于曲线不太清楚是用什么数据做的,这里直接自动添加拟合曲线

    图例里的文本上下标 出图后再编辑吧

    关于散点图今天还新学到一个知识点是:散点图的点如果非常多,如果输出pdf文件的话,pdf文件会非常大,比如GWAS里常用的曼哈顿图,这个pdf文件如果非常大后续如果想要编辑这个pdf文件会比较麻烦。

    关于如何解决这个问题,看到一个讨论群里有人讨论,他们提到一个办法是可以把散点栅格化 (栅格化是什么意思暂时不太明白)可以借助R包ggrastr

    对应的github主页是

    https://github.com/VPetukhov/ggrastr

    正好我们今天的推文内容是数据量比较多的散点图,我们可以按照这个做法试试,这里参考微信公众号推文 https://mp.weixin.qq.com/s/ou0cjD8dLMNaDLk588KSwQ

    安装ggrastr这个R包

    install.packages('ggrastr')
    

    如果要把点栅格化,只需要把对应的散点图函数geom_point()换成geom_point_rast()

    library(ggrastr)
    p2<-ggplot(data=new.fig1,
               aes(x=`Minor Allele Frequency`,
                   y=`Join Effect of Minor Allele`,
                   color=group))+
      geom_point_rast( key_glyph = rectangle_key_glyph(color=color,
                                                  fill=color,
                                                  padding = margin(3, 3, 3, 3)),
                       size=0.1,
                       raster.dpi = getOption("ggrastr.default.dpi", 300))+
      scale_color_manual(values = c("group01"="#ee82ee",
                                    "group02"="#2e8b57",
                                    "group03"="#1e90ff",
                                    "group04"="#daa520",
                                    "group05"="#cdc673"),
                         name="",
                         labels=c("group01"="P < 5 × 10–100 (672 SNPs)$)",
                                  "group02"="5 × 10–50 > P > 5 × 10–100 (1,110 SNPs)",
                                  "group03"="5 × 10–20 > P > 5 × 10–50 (3,513 SNPs)",
                                  "group04"="5 × 10–10 > P > 5 × 10–20 (5,192 SNPs)",
                                  "group05"="5 × 10–8 > P > 5 × 10–10 (1,624 SNPs)"))+
      theme_bw()+
      theme(panel.grid = element_blank(),
            panel.border = element_blank(),
            axis.line = element_line(),
            legend.position = c(0.7,0.8))+
      scale_x_continuous(breaks = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5),
                         labels = c(1,5,10,20,30,40,50))+
      scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1,0.2,0.3),
                         limits = c(-0.3,0.3))+
      guides(x=guide_axis_truncated(trunc_lower = 0.01,
                                    trunc_upper = 0.5),
             y=guide_axis_truncated(trunc_lower = -0.3,
                                    trunc_upper = 0.3))+
      labs(x="MAF (%) in cross-ancestry meta-analysis",
           y="Joint effect sizes (s.d.) of minor alleles\nin cross-ancestry meta-analysis")+
      geom_hline(yintercept = 0,color="gray")+
      geom_smooth(data = new.fig1 %>% 
                    filter(group=="group01") %>%
                    filter(`Join Effect of Minor Allele`<0),
                  aes(x=`Minor Allele Frequency`,
                      y=`Join Effect of Minor Allele`),
                  method = 'loess',
                  formula = 'y~x',
                  se=FALSE,color="gray",
                  show.legend = FALSE)+
      geom_smooth(data = new.fig1 %>% 
                    filter(group=="group01") %>%
                    filter(`Join Effect of Minor Allele`>0),
                  aes(x=`Minor Allele Frequency`,
                      y=`Join Effect of Minor Allele`),
                  method = 'loess',
                  formula = 'y~x',
                  se=FALSE,color="gray",
                  show.legend = FALSE)
    
    pdf("p1.pdf",width = 6,height = 6)
    p1
    dev.off()
    
    
    pdf("p2.pdf",width = 6,height = 6)
    p2
    dev.off()
    

    输出的p2如果放大 点是会变模糊的

    image.png

    两个文件的大小也不一样,栅格化之前是700k,栅格化之后只有200k

    image.png

    示例数据和代码可以给公众号推文点赞,点击在看,最后留言获取

    欢迎大家关注我的公众号

    小明的数据分析笔记本

    小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!

    相关文章

      网友评论

        本文标题:跟着Nature学作图:R语言ggplot2散点栅格化能够减小输

        本文链接:https://www.haomeiwen.com/subject/usyqzrtx.html