用小饼图代替点图里的点

作者: 小洁忘了怎么分身 | 来源:发表于2023-01-09 15:02 被阅读0次

    0.起因

    最近在群里看到一个图,棒棒糖图的点用饼图代替了。

    然后又看到一个文章里,用饼图代替了富集分析的点图,妙啊~

    这个地球上有什么漂亮的图能躲过我的小胖手呢?画他!

    1.搜索

    试了几个不同的关键词,最后这样搜到了。

    答案指向一个包,名叫scatterpie。一看作者,啊,这不是Y叔嘛~

    2. 跑示例代码

    2.1 示例数据

    set.seed(123)
    long <- rnorm(50, sd=100)
    lat <- rnorm(50, sd=50)
    d <- data.frame(long=long, lat=lat)
    d <- with(d, d[abs(long) < 150 & abs(lat) < 70,])
    n <- nrow(d)
    d$region <- factor(1:n)
    d$A <- abs(rnorm(n, sd=1))
    d$B <- abs(rnorm(n, sd=2))
    d$C <- abs(rnorm(n, sd=3))
    d$D <- abs(rnorm(n, sd=4))
    d[1, 4:7] <- d[1, 4:7] * 3
    head(d)
    
    ##          long        lat region          A        B        C        D
    ## 1  -56.047565  12.665926      1 2.13121969 8.663359 3.928711 8.676792
    ## 2  -23.017749  -1.427338      2 0.25688371 1.403569 1.375096 4.945092
    ## 4    7.050839  68.430114      3 0.24669188 0.524395 3.189978 5.138863
    ## 5   12.928774 -11.288549      4 0.34754260 3.144288 3.789556 2.295894
    ## 8 -126.506123  29.230687      5 0.95161857 3.029335 1.048951 2.471943
    ## 9  -68.685285   6.192712      6 0.04502772 3.203072 2.596539 4.439393
    

    2.2 示例图片

    library(ggplot2)
    library(scatterpie)
    ggplot() + 
      geom_scatterpie(aes(x=long, 
                          y=lat), 
                      data=d,
                      cols=LETTERS[1:4]) + 
      coord_equal()
    

    3.仿造示例数据并画图

    3.1找个数据做差异分析

    library(tinyarray)
    library(dplyr)
    library(org.Hs.eg.db)
    gse = "GSE42872"
    geo = geo_download(gse)
    Group = rep(c("control","treat"),each = 3)
    Group = factor(Group)
    find_anno(geo$gpl)
    
    ## [1] "`library(hugene10sttranscriptcluster.db);ids <- toTable(hugene10sttranscriptclusterSYMBOL)` and `ids <- AnnoProbe::idmap('GPL6244')` are both avaliable"
    
    ids <- AnnoProbe::idmap(geo$gpl,destdir = tempdir())
    deg = get_deg(geo$exp,Group,ids,logFC_cutoff = 2)
    head(deg)
    
    ##       logFC   AveExpr         t      P.Value    adj.P.Val        B probe_id
    ## 1  5.780170  7.370282  82.94833 3.495205e-12 1.163798e-07 16.32898  8133876
    ## 2 -4.212683  9.106625 -68.40113 1.437468e-11 2.393169e-07 15.71739  7965335
    ## 3  5.633027  8.763220  57.61985 5.053466e-11 4.431880e-07 15.04752  7972259
    ## 4 -3.801663  9.726468 -57.21112 5.324059e-11 4.431880e-07 15.01709  7972217
    ## 5  3.263063 10.171635  50.51733 1.324638e-10 8.821294e-07 14.45166  8129573
    ## 6 -3.843247  9.667077 -45.87910 2.681063e-10 1.487856e-06 13.97123  8015806
    ##   symbol change ENTREZID
    ## 1   CD36     up      948
    ## 2  DUSP6   down     1848
    ## 3    DCT     up     1638
    ## 4  SPRY2   down    10253
    ## 5  MOXD1     up    26002
    ## 6   ETV4   down     2118
    
    table(deg$change)
    
    ## 
    ##   down stable     up 
    ##     48  18092     82
    

    3.2 用差异基因做富集分析

    gene_up = deg$symbol[deg$change=="up"]
    gene_down = deg$symbol[deg$change=="down"]
    gene_diff = c(gene_up,gene_down)
    length(gene_diff)
    
    ## [1] 130
    
    head(gene_diff)
    
    ## [1] "CD36"       "DCT"        "MOXD1"      "NUPR1"      "ST6GALNAC2"
    ## [6] "TXNIP"
    
    g = quick_enrich(gene_diff,kkgo_file = "kk_gop.Rdata")
    
    dat = g$go@result %>% 
      arrange(pvalue) %>% 
      filter(ONTOLOGY=="BP") %>% 
      head(10) %>% 
      mutate(Description = factor(Description,levels = Description))
    
    p1 = ggplot(dat)+
      geom_point(aes(x = -log10(pvalue),y = Description))+
      theme_bw()
    p1
    

    3.3 计算每条term的上下调基因数量

    library(stringr)
    p = str_split(dat$geneID,"/")
    names(p) = dat$ID
    get_num = function(x){
      c(sum(x %in% gene_up),sum(x %in% gene_down))
    }
    re = sapply(p, get_num) %>% t() %>% as.data.frame()
    rownames(re) = names(p)
    colnames(re) = c("up","down")
    

    3.4 仿制示例数据并画图

    试了一下,如果纵坐标是分类变量是会报错的。。。

    所以曲线救国,先生成一组连续型数据,然后改刻度值。

    而需要保持饼图的形状是圆的,又需要固定坐标系,横纵坐标刻度是一样的。

    这时候难度就来了,需要生成的纵坐标得是和横坐标范围一致的等差数列,用seq函数。

    dp = cbind(lp = -log10(dat$pvalue),
               Description = dat$Description,
               re)
    dp$lab = seq(min(dp$lp), max(dp$lp), length.out = nrow(dp))
    head(dp)
    
    ##                  lp                                            Description up
    ## GO:0071900 7.264160 regulation of protein serine/threonine kinase activity  5
    ## GO:0050673 6.929666                          epithelial cell proliferation  8
    ## GO:0043405 6.351294                      regulation of MAP kinase activity  5
    ## GO:0009314 5.748852                                  response to radiation  6
    ## GO:0010165 5.691160                                      response to X-ray  0
    ## GO:0071214 5.635287                  cellular response to abiotic stimulus  6
    ##            down      lab
    ## GO:0071900   10 5.525307
    ## GO:0050673    8 5.718513
    ## GO:0043405    5 5.911719
    ## GO:0009314    8 6.104925
    ## GO:0010165    5 6.298131
    ## GO:0071214    6 6.491337
    
    p2 = ggplot() + 
      geom_scatterpie(aes(x=lp, 
                          y=lab), 
                      data=dp,
                      cols=colnames(re),
                      pie_scale = 1.5) +
      theme_bw()+
      scale_fill_manual(values = c("#f87669", "#2874C5"))+
      scale_y_continuous(labels = dp$Description,breaks = dp$lab)+
      labs(x = "-log10(pvalue)",y = "Description")+
      coord_equal()
    p2
    

    3.5 要棒棒糖还不简单嘛

    p2 + geom_segment(data = dp, aes(x=as.integer(min(dp$lp)), xend=lp, y=lab, yend=lab))
    

    相关文章

      网友评论

        本文标题:用小饼图代替点图里的点

        本文链接:https://www.haomeiwen.com/subject/dmipcdtx.html