美文网首页R语言R plotIMP research
跟着Nature学作图:R语言ggplot2频率分布直方图/堆积

跟着Nature学作图:R语言ggplot2频率分布直方图/堆积

作者: 小明的数据分析笔记本 | 来源:发表于2022-07-31 16:33 被阅读0次

    论文

    Graph pangenome captures missing heritability and empowers tomato breeding

    https://www.nature.com/articles/s41586-022-04808-9#MOESM8

    s41586-022-04808-9.pdf

    没有找到论文里的作图的代码,但是找到了部分做图数据,我们可以用论文中提供的原始数据模仿出论文中的图

    今天的推文重复一下论文中的 Figure3a Figure3b Figure3c 频率分布直方图,堆积柱形图,散点图

    image.png

    频率分布直方图代码

    library(readxl)
    fig3a<-read_excel("data/20220711/41586_2022_4808_MOESM7_ESM.xlsx",
                      sheet = "Fig3a",
                      skip = 1)
    head(fig3a)
    dim(fig3a)
    
    library(ggplot2)
    library(latex2exp)
    
    ggplot(data=fig3a,aes(x=h2))+
      geom_histogram(aes(fill=type),
                     bins = 100)+
      scale_fill_manual(values = c("#d1edcd","#94c2db","#fdbeb8"),
                        label=c(TeX(r"(\textit{h}${^2}$ (all variants)        )"),
                                TeX(r"(\textit{h}${^2}$ (leading variants))"),
                                TeX(r"(\textit{h}${^2}$ (local variants)    )")),
                        name="")+
      theme_bw()+
      theme(panel.grid = element_blank(),
            legend.position = c(0.8,0.8))+
      scale_x_continuous(expand = expansion(mult = c(0,0)),
                         breaks = seq(0,1,0.2))+
      scale_y_continuous(expand = expansion(mult = c(0,0)))+
      labs(y="Counts",
           x=TeX(r"(\textit{h}${^2}$)"))+
      geom_vline(xintercept = 0.27,lty="dashed",color="#94c2db")+
      geom_vline(xintercept = 0.37,lty="dashed",color="#fdbeb8")+
      geom_vline(xintercept = 0.62,lty="dashed",color="#d1edcd") -> p1
    
    x<-c(0.27,0.37,0.62)
    
    for (i in 1:3){
      p1<-p1+
        annotate(geom = "text",x=x[i],y=80,label=x[i],hjust=0)
    }
    p1
    
    image.png

    堆积柱形图

    fig3b<-read_excel("data/20220711/41586_2022_4808_MOESM7_ESM.xlsx",
                      sheet = "Fig3b")
    head(fig3b)
    dim(fig3b)
    
    fig3b$var2<-factor(fig3b$var2,
                       levels = c("MLM","LASSO","Overlapping"))
    
    library(tidyverse)
    fig3b %>% 
      group_by(var1) %>% 
      summarise(y=stack.bar.label.position(value),
                y_label=value) %>% 
      ungroup() -> df.label
    
    
    stack.bar.label.position<-function(x){
      x<-rev(x)
      new.x<-vector()
      
      for (i in 1:length(x)){
        if (i == 1){
          new.x<-append(new.x,x[i]/2)
        }
        
        else{
          new.x<-append(new.x,sum(x[1:i-1])+x[i]/2)
        }
      }
      return(new.x)
    }
    
    
    ggplot(data=fig3b,aes(x=var1,y=value))+
      geom_bar(stat="identity",
               position = "stack",
               aes(fill=var2))+
      scale_fill_manual(values = c("#5ba555","#2baae1","#c6dcf0"),
                        name="",
                        label=c("MLM unique (11)",
                                "LASSO unique (1,249)",
                                "Overlapping (538)"))+
      theme_classic()+
      theme(legend.position = c(0.8,0.8))+
      geom_text(data=df.label,
                aes(x=var1,y=y,label=y_label)) -> p2
    p2
    
    image.png

    最后的散点图

    fig3c<-read_excel("data/20220711/41586_2022_4808_MOESM7_ESM.xlsx",
                      sheet = "Fig3c",
                      skip = 1)
    head(fig3c)
    dim(fig3c)
    
    
    
    ggplot(data=fig3c %>% filter(Type=="MLM"),
           aes(x=pos,y=-log10(pvalue)))+
      geom_point(aes(shape=Variant,color=Variant,size=Variant))+
      scale_color_manual(values = c("#868686","#b8275a"))+
      theme_classic()+
      scale_x_continuous(labels = function(x)
        {sprintf("%0.2f",x/1000000)})+
      labs(x="Chr3 (Mb)",
           y=TeX(r"(-log${_1}{_0}$$\left[$\textit{P}$\right]$)"))+
      geom_hline(yintercept = 6,lty="dashed")+
      ggtitle("MLM")+
      theme(legend.position = "none")+
      scale_y_continuous(limits = c(0,10),
                         breaks = c(0,5,10)) -> p3.1
    
    ggplot(data=fig3c %>% filter(Type=="LASSO"),
           aes(x=pos,y=-log10(pvalue)))+
      geom_point(aes(shape=Variant,color=Variant),
                 size=3)+
      scale_color_manual(values = c("#b8275a"))+
      scale_shape_manual(values = 17)+
      theme_classic()+
      scale_x_continuous(labels = function(x)
      {sprintf("%0.2f",x/1000000)},
      limits = c(42.90*1000000,43*1000000))+
      labs(x="Chr3 (Mb)",
           y=TeX(r"(-log${_1}{_0}$$\left[$\textit{P}$\right]$)"))+
      geom_hline(yintercept = 6,lty="dashed")+
      ggtitle("LASSO")+
      theme(legend.position = "none")+
      scale_y_continuous(breaks = c(0,5,10))+
      geom_text(aes(label=ID),hjust=1.2) -> p3.2
    library(patchwork)
    p3.1/p3.2
    
    image.png

    最终的拼图

    p1+p2 + (p3.1/p3.2)
    
    image.png

    示例数据和代码可以自己到论文中获取,或者给本篇推文点赞,点击在看,然后留言获取

    欢迎大家关注我的公众号

    小明的数据分析笔记本

    小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!

    相关文章

      网友评论

        本文标题:跟着Nature学作图:R语言ggplot2频率分布直方图/堆积

        本文链接:https://www.haomeiwen.com/subject/pwfkwrtx.html