美文网首页BioStatRpython模块
gganatogram 人体解剖医学包

gganatogram 人体解剖医学包

作者: 土豆学生信 | 来源:发表于2018-09-18 13:22 被阅读513次

    原文来源 https://jespermaag.github.io/blog/2018/gganatogram/
    本文为翻译版,不当之处请见谅!

    gganatogram

    https://github.com/jespermaag/gganatogram

    image.png

    希望可以为不同的生物创建解剖图像,但是目前只有人类男性可用。
    在看到ggseg的twitter帖子之后,我想到了这个包的想法。类似的工具对整个生物学都有帮助。由于找不到任何类似的东西,决定创建我的第一个R包。

    该软件包使用ArrayExpress Expression Atlas中图中的组织坐标。
    https://www.ebi.ac.uk/gxa/home
    https://github.com/ebi-gene-expression-group/anatomogram

    生成包

    下载所有svg

    为了创建包,我首先必须从Expression Atlas中检索所有组织的坐标。使用以下命令下载解剖图包。

    npm install --save anatomogram
    
    

    从svg中提取坐标

    我使用python来提取homo_sapiens.mal.svg文件中每个组织的坐标,名称和转换。此代码获取svg并将名称,坐标和转换写入文件,然后在R中处理。

    from xml.dom import minidom
    import os
    import csv
    organism="homo_sapiens.male"
    doc = minidom.parse(organism + ".svg")
    your_csv_file = open(organism + '_coords.tsv', 'w')
    wr = csv.writer(your_csv_file, delimiter='\t')
    for path in doc.getElementsByTagName('path'):
        if "outline" in path.getAttribute('id') or "LAYER_OUTLINE" in path.getAttribute('id') :
            wr.writerow([path.getAttribute('id') ,path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')]) 
        if path.getAttribute('id').startswith('UB'):
            wr.writerow([path.getElementsByTagName('title')[0].firstChild.nodeValue, path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')])
        if path.parentNode.attributes['id'].value.startswith('UB'):
            if "transform" not in list(path.parentNode.attributes.keys()): 
                wr.writerow([path.parentNode.attributes['id'].value, path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')])
    for path in doc.getElementsByTagName('g')[5:]:
        if len(path.childNodes) >0 :
            for node in path.childNodes:
                if "text" not in node.nodeName:
                    print(node.nodeName)
                    print(node.attributes.keys())
                    if 'd' in list(node.attributes.keys()): 
                        nodeVal = node.attributes['d'].value
                        wr.writerow([path.childNodes[1].attributes['id'].value, nodeVal,  path.attributes['transform'].value])
    your_csv_file.close()
    

    处理R中的坐标,并创建一个包

    我创建了一个函数来将坐标提取到数据框中并转换数据。需要一些手动编辑才能获得正确的坐标,并删除一些不起作用的组织

    extractCoords <- function(coords, name, transMatrix) {
        c <- strsplit(coords, " ")
        c[[1]]
    
        c[[1]][c(grep("M", c[[1]] )+1,grep("M", c[[1]] )+2)] <- NA
    
        c[[1]] <- c[[1]][grep("[[:alpha:]]", c[[1]], invert=TRUE)]
    
        anatCoord <- as.data.frame(lapply( c, function(u) 
            matrix(as.numeric(unlist(strsplit(u, ","))),ncol=2,byrow=TRUE) ))
        anatCoord$X2[is.na(anatCoord$X1)] <- NA
        anatCoord$X1[is.na(anatCoord$X2)] <- NA
        anatCoord$id <- name
    
        if (length(transMatrix[grep('matrix', transMatrix)])>0) {
            transForm <- gsub('matrix\\(|\\)', '', transMatrix)
            transForm <- as.numeric(strsplit(transForm, ",")[[1]])
    
            anatCoord$x <-  (anatCoord$X1* transForm[1]) + (anatCoord$X1* transForm[3]) + transForm[5]
            anatCoord$y <-  (anatCoord$X2* transForm[2]) + (anatCoord$X2* transForm[4]) + transForm[6]
        } else if (grep('translate', transMatrix)) {
            transForm <- gsub('translate\\(|\\)', '', transMatrix)
            transForm <- as.numeric(strsplit(transForm, ",")[[1]])
             if(name =='leukocyte' & transForm[1]==4.5230265) {
                transForm <- c(103.63591+4.5230265,-47.577078+11.586659)
            }
            anatCoord$x <-  anatCoord$X1 + transForm[1]
            anatCoord$y <-  anatCoord$X2 + transForm[2]
        }
        #anatCoord <- anatCoord[complete.cases(anatCoord),]
        if (name == 'bronchus') {
            if (max(anatCoord$x, na.rm=T) >100 ) {
                anatCoord$x <- NA
                anatCoord$y <- NA
            }
        }
        if( any(anatCoord[complete.cases(anatCoord),]$x < -5)) {
                anatCoord$x <- NA
                anatCoord$y <- NA
        }
    
        if( any(anatCoord[complete.cases(anatCoord),]$x > 150)) {
                anatCoord$x <- NA
                anatCoord$y <- NA
        }
        return(anatCoord)
    }
    

    最后,用extractCoords函数处理了python输出。

    hsMale <- read.table('homo_sapiens.male_coords.tsv', sep='\t', stringsAsFactors=F)
    
    hgMale_list <- list()
    for (i in 1:nrow(hsMale)) {
        df <- extractCoords(hsMale$V2[i], hsMale$V1[i],  hsMale$V3[i])
    
        hgMale_list[[i]] <- extractCoords(hsMale$V2[i], hsMale$V1[i],  hsMale$V3[i])
        names(hgMale_list)[i] <-  paste0(hsMale$V1[i],'-', i)
    }
    names(hgMale_list) <- gsub('-.*', '', names(hgMale_list))
    

    然后将结果列表用作gganatogram包的基础。可以使用以下说明从github安装该软件包。

    安装

    使用devtools从github安装。

    ## install from Github
    devtools::install_github("jespermaag/gganatogram")
    

    用法

    这个包需要ggplot2ggpolypath

    library(ggplot2)
    library(ggpolypath)
    library(gganatogram)
    library(dplyr)
    

    要使用函数gganatogram,您需要拥有一个包含器官组织,颜色和数值的数据框。

    organPlot <- data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
     type = c("circulation", "circulation",  "nervous system", "nervous system", "digestion", "digestion", "digestion"), 
     colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
     value = c(10, 5, 1, 8, 2, 5, 5), 
     stringsAsFactors=F)
    
    head(organPlot)
    
    ##       organ           type colour value
    ## 1     heart    circulation    red    10
    ## 2 leukocyte    circulation    red     5
    ## 3     nerve nervous system purple     1
    ## 4     brain nervous system purple     8
    ## 5     liver      digestion orange     2
    ## 6   stomach      digestion orange     5
    

    使用函数gganatogram,根据颜色填充器官。

    gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="colour")
    
    image.png

    我们可以使用ggplot主题和函数来调整图

    gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") + 
    theme_void()
    
    image.png

    我们还可以使用hgMale_key绘制所有可用组织,这是一个可用的对象

    hgMale_key$organ
    
    ##  [1] "bone marrow"               "frontal cortex"           
    ##  [3] "prefrontal cortex"         "gastroesophageal junction"
    ##  [5] "caecum"                    "ileum"                    
    ##  [7] "rectum"                    "nose"                     
    ##  [9] "tongue"                    "penis"                    
    ## [11] "nasal pharynx"             "spinal cord"              
    ## [13] "throat"                    "diaphragm"                
    ## [15] "liver"                     "stomach"                  
    ## [17] "spleen"                    "duodenum"                 
    ## [19] "gall bladder"              "pancreas"                 
    ## [21] "colon"                     "small intestine"          
    ## [23] "appendix"                  "urinary bladder"          
    ## [25] "bone"                      "cartilage"                
    ## [27] "esophagus"                 "skin"                     
    ## [29] "brain"                     "heart"                    
    ## [31] "lymph_node"                "skeletal_muscle"          
    ## [33] "leukocyte"                 "temporal_lobe"            
    ## [35] "atrial_appendage"          "coronary_artery"          
    ## [37] "hippocampus"               "vas_deferens"             
    ## [39] "seminal_vesicle"           "epididymis"               
    ## [41] "tonsil"                    "lung"                     
    ## [43] "trachea"                   "bronchus"                 
    ## [45] "nerve"                     "kidney"
    
    gganatogram(data=hgMale_key, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +theme_void()
    
    image.png

    要跳过图表的轮廓,请使用outline = F.

    organPlot %>%
        dplyr::filter(type %in% c('circulation', 'nervous system')) %>%
    gganatogram(outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") + 
    theme_void()
    
    image.png

    我们可以根据给予每个器官的值来填充组织

    gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="value") + 
    theme_void() +
    scale_fill_gradient(low = "white", high = "red")
    

    我们也可以使用facet_wrap来比较组。
    首先创建两个数据框以及设置类型列中的不同数值和条件。

    compareGroups <- rbind(data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
      colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
     value = c(10, 5, 1, 8, 2, 5, 5), 
     type = rep('Normal', 7), 
     stringsAsFactors=F),
     data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
      colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
     value = c(5, 5, 10, 8, 2, 5, 5), 
     type = rep('Cancer', 7), 
     stringsAsFactors=F))
    
    gganatogram(data=compareGroups, fillOutline='#a6bddb', organism='human', sex='male', fill="value") + 
        theme_void() +
        facet_wrap(~type) +
        scale_fill_gradient(low = "white", high = "red") 
    
    image.png
    gganatogram(data=hgMale_key, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
        theme_void() +
        facet_wrap(~type)
    
    image.png
    gganatogram(data=hgMale_key, outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
        theme_void() +
        facet_wrap(~type, scale='free')
    
    image.png
    organtype <- organPlot
    organtype %>%
        mutate(type=organ) %>%
        gganatogram( outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
            theme_void() +
            facet_wrap(~type, scale='free')
    
    image.png

    相关文章

      网友评论

        本文标题:gganatogram 人体解剖医学包

        本文链接:https://www.haomeiwen.com/subject/vreunftx.html