美文网首页
Seurat基因集打分函数AddModuleScore

Seurat基因集打分函数AddModuleScore

作者: 小洁忘了怎么分身 | 来源:发表于2023-09-07 17:38 被阅读0次

    与之前学过的AUCell一样,Seurat里面自带的函数AddModuleScore也可以给细胞打分,我用之前拿的数据,把AUCell换成AddModuleScore玩一下。

    1.提取指定GOterm的基因

    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7606976/

    在这篇文章里提到,Reactive oxygen species (ROS) 活性氧相关的17个GO条目,可以把这些基因提取出来。使用msigdbr包搞定。Data_Sheet_2.XLSX是文章的附件

    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7606976/bin/Data_Sheet_2.XLSX

    rm(list = ls())
    library(tidyverse)
    dat = rio::import("Data_Sheet_2.XLSX")
    g = dat[-1,1]
    g
    
    ##  [1] "GO_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"                       
    ##  [2] "GO_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"                          
    ##  [3] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
    ##  [4] "GO_NEGATIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
    ##  [5] "GO_NEGATIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"   
    ##  [6] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"   
    ##  [7] "GO_MITOCHONDRIAL_ELECTRON_TRANSPORT_CYTOCHROME_C_TO_OXYGEN"            
    ##  [8] "GO_SUPEROXIDE_GENERATING_NADPH_OXIDASE_ACTIVITY"                       
    ##  [9] "GO_HYDROGEN_PEROXIDE_METABOLIC_PROCESS"                                
    ## [10] "GO_HYDROGEN_PEROXIDE_CATABOLIC_PROCESS"                                
    ## [11] "GO_PENTOSE_METABOLIC_PROCESS"                                          
    ## [12] "GO_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"                                
    ## [13] "GO_REGULATION_OF_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"                  
    ## [14] "GO_CELLULAR_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"                       
    ## [15] "GO_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"         
    ## [16] "GO_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"            
    ## [17] "GO_NEGATIVE_REGULATION_OF_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
    

    17条term👆

    2.从msigdbr查找这些term

    library(msigdbr)
    GO_df = msigdbr(species = "Homo sapiens",category = "C5") %>% 
      dplyr::select(gene_symbol,gs_name,gs_subcat)
    
    dim(GO_df)
    
    ## [1] 1424471       3
    
    table(GO_df$gs_subcat)
    
    ## 
    ##  GO:BP  GO:CC  GO:MF    HPO 
    ## 721379 115769 122674 464649
    
    GO_df = GO_df[GO_df$gs_subcat!="HPO",]
    
    table(g %in% GO_df$gs_name)
    
    ## 
    ## FALSE 
    ##    17
    

    全部匹配失败啦?

    其实是因为前缀不同

    head(GO_df$gs_name,3)
    
    ## [1] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
    ## [2] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
    ## [3] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
    
    head(g,3)
    
    ## [1] "GO_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"                       
    ## [2] "GO_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"                          
    ## [3] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
    

    把前缀去掉来匹配,顺便把大写变成小写,变得易读一点。

    sn = GO_df$gs_name %>%
      str_to_lower()%>%
      str_replace("_","///") %>%
      str_split("///",simplify = T)%>%
      .[,2] %>%
      str_replace_all("_"," ")
    head(sn)
    
    ## [1] "10 formyltetrahydrofolate metabolic process"
    ## [2] "10 formyltetrahydrofolate metabolic process"
    ## [3] "10 formyltetrahydrofolate metabolic process"
    ## [4] "10 formyltetrahydrofolate metabolic process"
    ## [5] "10 formyltetrahydrofolate metabolic process"
    ## [6] "10 formyltetrahydrofolate metabolic process"
    
    GO_df = mutate(GO_df,short_name = sn)
    
    g = str_to_lower(g)%>%
      str_replace("_","///") %>%
      str_split("///",simplify = T)%>%
      .[,2] %>%
      str_replace_all("_"," ")
    head(g)
    
    ## [1] "reactive oxygen species biosynthetic process"                       
    ## [2] "reactive oxygen species metabolic process"                          
    ## [3] "positive regulation of reactive oxygen species biosynthetic process"
    ## [4] "negative regulation of reactive oxygen species biosynthetic process"
    ## [5] "negative regulation of reactive oxygen species metabolic process"   
    ## [6] "positive regulation of reactive oxygen species metabolic process"
    
    table(g %in% GO_df$short_name)
    
    ## 
    ## FALSE  TRUE 
    ##     1    16
    

    经过数据框搜索,发现匹配不上的那个其实是因为少了俩空格。。。

    所以直接改一下就好。

    “superoxide generating nadph oxidase activity”

    “superoxide generating nad p h oxidase activity”

    g[!(g %in% GO_df$short_name)] = "superoxide generating nad p h oxidase activity"
    table(g %in% GO_df$short_name)
    
    ## 
    ## TRUE 
    ##   17
    
    rosgene = GO_df %>%
      filter(short_name %in% g) %>%
      pull(gene_symbol) %>%
      unique()
    length(rosgene)
    
    ## [1] 402
    

    2.打分

    与之前做过的Aucell不同,这个是seurat自带的函数,使用起来更加简单

    sce.all是seurat+singleR得出的对象,markers是findallmarkers得到的数据框。

    load("sce.all.Rdata")
    load("makers.Rdata")
    k = markers$p_val_adj < 0.05 & 
      abs(markers$avg_log2FC) > 1.5 & 
      markers$pct.1 > 0.5 & 
      markers$pct.2 < 0.5
    table(k)
    
    ## k
    ## FALSE  TRUE 
    ## 13100  1483
    
    gi = intersect(markers[k,"gene"],rosgene) 
    #32个
    library(Seurat)
    DotPlot(sce.all,features = gi,cols = "RdYlBu",
            group.by = "RNA_snn_res.0.5")+RotatedAxis()
    
    sce.all =  AddModuleScore(object = sce.all,list(gi))
    colnames(sce.all@meta.data)
    
    ##  [1] "orig.ident"      "nCount_RNA"      "nFeature_RNA"    "patient"        
    ##  [5] "time"            "sample_ID"       "clst"            "TSNE_x"         
    ##  [9] "TSNE_y"          "percent.mt"      "percent.hb"      "RNA_snn_res.0.5"
    ## [13] "seurat_clusters" "Cluster1"
    

    这个分数是直接加到meta.data里面的咯。列名也有默认,叫Cluster1.

    3.FeaturePlot可视化

    标准版:

    library(Seurat)
    FeaturePlot(sce.all,features = "Cluster1") 
    

    定制版:

    dat<- data.frame(sce.all@meta.data, 
                    sce.all@reductions$umap@cell.embeddings,
                    sce.all@active.ident)
    colnames(dat)[ncol(dat)] = "seurat_annotation"
    class_avg <- dat %>%
      group_by(seurat_annotation) %>%
      summarise(
        UMAP_1 = median(UMAP_1),
        UMAP_2 = median(UMAP_2)
      )
    
    library(ggpubr)
    ggplot(dat, aes(UMAP_1, UMAP_2))  +
      geom_point(aes(colour  = Cluster1)) + viridis::scale_color_viridis(option="A") +
      ggrepel::geom_label_repel(aes(label = seurat_annotation),
                                data = class_avg,
                                label.size = 0,
                                segment.color = NA)+
      theme(legend.position = "none") + theme_bw()
    
    dat$score_group = ifelse(dat$Cluster1>median(dat$Cluster1),"active","non_active")
    

    4.活跃和不活跃组的细胞比例条形图

    ggplot(dat,aes(score_group,fill = seurat_annotation))+
      geom_bar(position = "fill", alpha = 0.9)+
      scale_fill_brewer(palette = "Set1")+
      theme_classic()
    

    相关文章

      网友评论

          本文标题:Seurat基因集打分函数AddModuleScore

          本文链接:https://www.haomeiwen.com/subject/azcrvdtx.html