美文网首页researchIMP researchR for statistics
跟着Nature学作图:R语言ggplot2柱形图添加误差线和频

跟着Nature学作图:R语言ggplot2柱形图添加误差线和频

作者: 小明的数据分析笔记本 | 来源:发表于2022-12-11 14:52 被阅读0次

    论文

    A saturated map of common genetic variants associated with human height

    https://www.nature.com/articles/s41586-022-05275-y

    s41586-022-05275-y.pdf

    代码没有公开,但是作图数据基本都公开了,争取把每个图都重复一遍

    今天的推文重复论文中的extended Figure5 频率分布直方图和柱形图添加误差线

    image.png

    其中图b的数据没有找到,我们只重复其他5个小图

    首先是两个频率分布直方图

    这两个作图代码是一样的

    library(readxl)
    
    dat01<-read_excel("data/20221014/extendFig5.xlsx",
                      sheet = "Source Data for Panels a - c")
    head(dat01)
    
    library(ggplot2)
    library(ggh4x)
    p1<-ggplot(data=dat01,aes(x=`Effect Size`))+
      geom_histogram(bins = 70,color="black",fill="grey")+
      geom_vline(xintercept = 0,lty="dashed",color="green")+
      scale_x_continuous(breaks = seq(-1.5,1.5,by=0.5))+
      scale_y_continuous(breaks = seq(0,1000,by=200))+
      theme_classic()+
      guides(x=guide_axis_truncated(trunc_lower = -1.5,
                                    trunc_upper = 1.5),
             y=guide_axis_truncated(trunc_lower = 0,
                                    trunc_upper = 1000))+
      labs(x="Estimated effect of minor haplotype",
           y="Frequency")
    p1
    
    p3<-ggplot(data=dat01,aes(x=`Variance Explained`*100))+
      geom_histogram(bins = 30,color="black",fill="grey")+
      scale_x_continuous(breaks = seq(0,0.006,by=0.001))+
      scale_y_continuous(breaks = seq(0,6000,by=2000))+
      theme_classic()+
      guides(x=guide_axis_truncated(trunc_lower = 0,
                                    trunc_upper = 0.006),
             y=guide_axis_truncated(trunc_lower = 0,
                                    trunc_upper = 6000))+
      labs(x="Variance explained by each haplotype (in %)",
           y="Frequency")
    p3
    
    

    图d柱形图误差线叠加散点图

    dat02<-read_excel("data/20221014/extendFig5.xlsx",
                      sheet = "Panel d")
    dat02 %>% colnames()
    
    library(tidyverse)
    dat02 %>% 
      mutate(x=paste0(`Variance Explained by underlying Causal variants (q2)`*100,"%")) %>% 
      group_by(x) %>% 
      summarise(mean_value=mean(`Signal Density Detected`),
                sd_value=sd(`Signal Density Detected`)) %>% 
      ungroup() -> dat02.1
    
    dat02 %>% 
      mutate(x=paste0(`Variance Explained by underlying Causal variants (q2)`*100,"%")) -> dat02.2
    p4<-ggplot()+
      geom_errorbar(data=dat02.1,
                    aes(x=x,
                        ymin=mean_value-0.1,
                        ymax=mean_value+sd_value),
                    width=0.3,
                    color="#e27765")+
      geom_col(data=dat02.1,
               aes(x=x,y=mean_value),
               fill="#daa421")+
      geom_point(data=dat02.2,
                 aes(x=x,y=`Signal Density Detected`),
                 color="gray",size=3)+
      theme_classic()+
      scale_y_continuous(breaks = seq(0,10,by=2),
                         expand=expansion(mult = c(0,0.1)))+
      scale_x_discrete(labels=c("0.5%\n(1.5)","1%\n(2.0)",
                                "2%\n(2.8)","5%\n(4.5)"))+
      theme(axis.line.x = element_blank(),
            axis.ticks.x = element_blank())+
      guides(y=guide_axis_truncated(trunc_lower = 0,
                                    trunc_upper = 10))+
      coord_cartesian(clip = "off")+
      labs(x="Variabce explained by causal variant\n(median allelic effect across simulation replicates - in SD)",
           y="Mean density (+S.E.) of genome-wide\nsignigicant SNPs within 100kb")
    
    p4
    

    普通柱形图添加误差线

    dat03<-read_excel("data/20221014/extendFig5.xlsx",
                      sheet = "Panel e")
    dat03 %>% colnames()
    p5<-dat03 %>% 
      mutate(Ancestries=factor(Ancestries,
                               levels = Ancestries)) %>% 
      ggplot(aes(x=Ancestries,y=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`))+
      geom_col(fill="#fe7357")+
      geom_errorbar(aes(ymin=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`-`Standard Error`,
                        ymax=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`+`Standard Error`),
                    width=0.3,
                    color="#fe7357")+
      theme_classic()+
      theme(axis.line.x = element_blank(),
            axis.ticks.x = element_blank())+
      scale_y_continuous(breaks = seq(0,0.8,by=0.2),
                         limits = c(0,0.8),
                         expand = expansion(mult=c(0,0)))+
      scale_x_discrete(labels=c("SAS\n(N=9,219)","EUR\n(N=414,429)",
                                "AFR\n(N=7,543)","EAS(N=1,496)"))
    
    p5
    

    水平柱形图添加误差线

    dat04<-read_excel("data/20221014/extendFig5.xlsx",
                      sheet = "Panel f")
    
    dat04 %>% colnames()
    p6<-dat04 %>% 
      mutate(`Statistical Model`=factor(`Statistical Model`,
                                        levels = `Statistical Model`)) %>% 
      ggplot(aes(x=`Variance explained`,y=`Statistical Model`))+
      geom_col(fill="#006403")+
      geom_errorbarh(aes(xmin=`Variance explained`-`Standard-Error`,
                         xmax=`Variance explained`+`Standard-Error`),
                     height=0.2,
                     color="#d19f84")+
      theme_classic()+
      theme(axis.line.y = element_blank(),
            axis.ticks.y = element_blank())+
      scale_y_discrete(labels=scales::label_wrap(30))+
      scale_x_continuous(limits = c(0,0.0055),
                         breaks = seq(0,0.005,by=0.001))+
      guides(x=guide_axis_truncated(trunc_lower = 0,
                                    trunc_upper = 0.005))+
      labs(y=NULL)
    p6
    

    最后是所有图组合到一起

    library(patchwork)
    
    (p1+theme(axis.title = element_text(size=10))+
                plot_spacer()+
                p3+
                theme(axis.title = element_text(size=10)))/(p4+
                             theme(axis.title = element_text(size=10))+
                             p5+
                             theme(axis.title = element_text(size=10),
                                   axis.text.x = element_text(size=10),)+
                             p6+
                             theme(axis.text.y = element_text(size=10)))+
      plot_annotation(tag_levels = "a")
    
    image.png

    示例数据和代码可以给公众号推文点赞,点击在看,最后留言获取

    欢迎大家关注我的公众号

    小明的数据分析笔记本

    小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!

    相关文章

      网友评论

        本文标题:跟着Nature学作图:R语言ggplot2柱形图添加误差线和频

        本文链接:https://www.haomeiwen.com/subject/jmgtqdtx.html