美文网首页基因组数据绘图科研信息学ggplot2
『R画图脚本进阶』单个基因结构绘制

『R画图脚本进阶』单个基因结构绘制

作者: ShawnMagic | 来源:发表于2020-11-09 16:48 被阅读0次

    2020.11.10更新代码
    emmm, 稍微想了下把那个辣眼睛的代码改的稍微不辣眼睛了一点...


    果然需求是生产力驱动的第一要素.....以前没想过要整,现在又需求了,其实TBtools就可以很方便的绘制,不过还是手痒想用R实现一下。就用ggplot2撸出来了。国际惯例,上图

    Gh_A08G039000.jpg

    方法和原理

    其实就是把gff文件可视化一下,看GFF文件的介绍:陈连福大佬的博客-GFF3格式介绍
    我们发现GFF3包含了基因整体信息,所以就想**根据gff文件信息,提取UTR和cds信息用ggplot2geom_segment小片段一段一段的画出来。其实很简单。注意以下几个信息:

    • strand,方向性。
    • exon是否包含UTR。

    所以鉴于这两点也是写了两个判断。直来直去的代码有点辣眼睛,不过没时间了,先解决问题,以后在慢慢美化ಥ_ಥ。

    Script

    更新后, 之前的不删除了,看最后能优化成什么样

    drawGeneStructure = function(gff,size,lcolor){
      names(gff) = c("chr","source","type","start","end","score","strand","phase","attributes")
      chr = unique(gff$chr)
      strand = unique(gff$strand)
      a = filter(gff,type == "gene")$attributes
      desc = strsplit(a,split = ";")%>%unlist(.)
      tpm1 = filter(gff,type == "exon")
      tpm3 = filter(gff,type == "five_prime_UTR"| type == "three_prime_UTR")
      if (strand == "-"){
        tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                          end = tpm1$start,
                          start = tpm1$end,
                          y = 1)
        xend = tpm2$end[1]-100
      } else {
        tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                          start = tpm1$start,
                          end = tpm1$end,
                          y = 1)
        xend = tpm2$end[1]+100
      }
      p = ggplot(tpm2,aes(x = start,y = y))+
        geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                     color = lcolor)+
        geom_segment(data = tpm2,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                     size = size)+
        geom_segment(aes(x = tpm2$end[1],y = 0, xend = xend, yend = 0),
                     arrow = arrow(length = unit(0.2,"cm")),
                     color = "red")+
        xlab(chr)+
        theme(axis.ticks.y.left = element_blank(),
              axis.ticks.y = element_blank(),
              axis.line.y = element_blank(),
              axis.line.x.top = element_blank(),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              plot.background = element_blank(),
              panel.background = element_blank(),
              axis.title.y = element_blank(),
              axis.line = element_line(colour = "black",size = 1),
              legend.position = "none")
      
      if(nrow(tpm3) != 0){
        p = p +
          geom_segment(data = tpm3,
                       mapping = aes(x = start,xend = end,y = 0, yend = 0),
                       size = size,
                       color = "grey")
      } else {
        p = p
      }
      return(p)
    }
    

    原始辣眼睛代码

    drawGeneStructure = function(gff,size,lcolor){
      names(gff) = c("chr","source","type","start","end","score","strand","phase","attributes")
      chr = unique(gff$chr)
      strand = unique(gff$strand)
      a = filter(gff,type == "gene")$attributes
      desc = strsplit(a,split = ";")%>%unlist(.)
      tpm1 = filter(gff,type == "exon")
      tpm3 = filter(gff,type == "five_prime_UTR"| type == "three_prime_UTR")
      if (nrow(tpm3) != 0) {
        if (strand == "-") {
          tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                            end = tpm1$start,
                            start = tpm1$end,
                            y = 1)
          tpm3 = data.frame(type = paste(tpm3$type,seq(1:nrow(tpm3)),sep = ""),
                            end = tpm3$start,
                            start = tpm3$end)
          p = ggplot(tpm2,aes(x = start,y = y))+
            geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                         color = lcolor)+
            geom_segment(data = tpm2,
                         mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                         size = size)+
            geom_segment(data = tpm3,
                         mapping = aes(x = start,xend = end,y = 0, yend = 0),
                         size = size,
                         color = "grey")+
            geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]-100, yend = 0),
                         arrow = arrow(length = unit(0.2,"cm")),
                         color = "red")+
            xlab(chr)+
            theme(axis.ticks.y.left = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.line.y = element_blank(),
                  axis.line.x.top = element_blank(),
                  panel.grid = element_blank(),
                  axis.text.y = element_blank(),
                  plot.background = element_blank(),
                  panel.background = element_blank(),
                  axis.title.y = element_blank(),
                  axis.line = element_line(colour = "black",size = 1),
                  legend.position = "none")
        } else {
          tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                            start = tpm1$start,
                            end = tpm1$end,
                            y = 1)
          p = ggplot(tpm2,aes(x = start,y = y))+
            geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                         color = lcolor)+
            geom_segment(data = tpm2,
                         mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                         size = size)+
            geom_segment(data = tpm3,
                         mapping = aes(x = start,xend = end,y = 0, yend = 0),
                         size = size,
                         color = "grey")+
            geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]+100, yend = 0),
                         arrow = arrow(length = unit(0.2,"cm")),
                         color = "red")+
            xlab(chr)+
            theme(axis.ticks.y.left = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.line.y = element_blank(),
                  axis.line.x.top = element_blank(),
                  panel.grid = element_blank(),
                  axis.text.y = element_blank(),
                  plot.background = element_blank(),
                  panel.background = element_blank(),
                  axis.title.y = element_blank(),
                  axis.line = element_line(colour = "black",size = 1),
                  legend.position = "none")
        }
      } else{
        if (strand == "-") {
          tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                            end = tpm1$start,
                            start = tpm1$end,
                            y = 1)
          p = ggplot(tpm2,aes(x = start,y = y))+
            geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                         color = lcolor)+
            geom_segment(data = tpm2,
                         mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                         size = size)+
            geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]-100, yend = 0),
                         arrow = arrow(length = unit(0.2,"cm")),
                         color = "red")+
            xlab(chr)+
            theme(axis.ticks.y.left = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.line.y = element_blank(),
                  axis.line.x.top = element_blank(),
                  panel.grid = element_blank(),
                  axis.text.y = element_blank(),
                  plot.background = element_blank(),
                  panel.background = element_blank(),
                  axis.title.y = element_blank(),
                  axis.line = element_line(colour = "black",size = 1),
                  legend.position = "none")
        } else {
          tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                            start = tpm1$start,
                            end = tpm1$end,
                            y = 1)
          p = ggplot(tpm2,aes(x = start,y = y))+
            geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                         color = lcolor)+
            geom_segment(data = tpm2,
                         mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                         size = size)+
            geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]+100, yend = 0),
                         arrow = arrow(length = unit(0.2,"cm")),
                         color = "red")+
            xlab(chr)+
            theme(axis.ticks.y.left = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.line.y = element_blank(),
                  axis.line.x.top = element_blank(),
                  panel.grid = element_blank(),
                  axis.text.y = element_blank(),
                  plot.background = element_blank(),
                  panel.background = element_blank(),
                  axis.title.y = element_blank(),
                  axis.line = element_line(colour = "black",size = 1),
                  legend.position = "none")
        }
      }
      
    return(p)
    }
    

    Usage

    • gff3文件提取你要画的基因的结构,如果没有gff3,按照下面的自己写一个df也行:
    # 提取
    grep "Gh_A08G039000" gh.criv2.gff3
    # 提取结果:
    A08     EVM     gene    4148620 4154669 .       -       .       ID=Gh_A08G039000;Name=RH8;description=DEAD-box ATP-dependent RNA helicase 8 ;
    A08     EVM     mRNA    4148620 4154669 .       -       .       ID=Gh_A08G039000.1;Parent=Gh_A08G039000
    A08     EVM     exon    4148620 4148965 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     three_prime_UTR 4148620 4148965 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     three_prime_UTR 4149091 4149130 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     exon    4149091 4149209 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4149131 4149209 .       -       1       Parent=Gh_A08G039000.1
    A08     EVM     exon    4149308 4149381 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4149308 4149381 .       -       0       Parent=Gh_A08G039000.1
    A08     EVM     exon    4149465 4149553 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4149465 4149553 .       -       2       Parent=Gh_A08G039000.1
    A08     EVM     exon    4150111 4150291 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4150111 4150291 .       -       0       Parent=Gh_A08G039000.1
    A08     EVM     exon    4150367 4150618 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4150367 4150618 .       -       0       Parent=Gh_A08G039000.1
    A08     EVM     exon    4151917 4152155 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4151917 4152155 .       -       2       Parent=Gh_A08G039000.1
    A08     EVM     exon    4152683 4152917 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4152683 4152917 .       -       0       Parent=Gh_A08G039000.1
    A08     EVM     exon    4153404 4153464 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4153404 4153464 .       -       1       Parent=Gh_A08G039000.1
    A08     EVM     CDS     4154015 4154283 .       -       0       Parent=Gh_A08G039000.1
    A08     EVM     exon    4154015 4154669 .       -       .       Parent=Gh_A08G039000.1
    A08     EVM     five_prime_UTR  4154284 4154669 .       -       .       Parent=Gh_A08G039000.1
    
    • 画图
      参数解释:
      • gff: 就是上面你提取出来的基因gff信息
      • size: 就是图中exon的高度,size越大高度越大,建议size = 4
      • lcolor: 中间那条黑线的颜色,这个看你爱好
    drawGeneStructure(gff = gff, lcolor = "black", size = 4)
    

    完工...
    为了保住小徽章我也是醉了....

    相关文章

      网友评论

        本文标题:『R画图脚本进阶』单个基因结构绘制

        本文链接:https://www.haomeiwen.com/subject/xqahbktx.html