美文网首页scRNAseq
单细胞之富集分析-6:PROGENy

单细胞之富集分析-6:PROGENy

作者: Hayley笔记 | 来源:发表于2022-08-16 09:46 被阅读0次

    单细胞富集分析系列:


    0. 简介

    异常细胞信号会引起癌症等其他疾病,并且是常见的治疗的靶点。常可以通过基因的表达来推断某个信号通路的活性。然而,只考虑基因表达对通路的作用往往忽略了翻译后修饰的作用,并且下游信号代表非常特定的实验条件。在这里,作者提出介绍PROGENy,这是一种通过利用大量公开可用的扰动实验,来克服了这两个局限性的方法。与现有方法不同,PROGENy可以(i)恢复已知驱动基因突变的作用,(ii)提供或改善药物的marker,以及(iii)区分致癌和肿瘤抑制途径,以确保患者生存。

    PROGENy可以从基因表达数据中推断14种信号通路(雄激素,雌激素,EGFR,低氧,JAK-STAT,MAPK,NFkB,PI3K,p53,TGFb,TNFa,Trail,VEGF和WNT)的通路活性。默认情况下,途径活动推断是基于相应的途径扰动后前100个响应性最高的基因的基因集,我们将其称为途径的足迹基因。为每个足迹基因分配一个权重,该权重表示对路径扰动进行调节的强度和方向。途径得分是通过表达和足迹基因权重乘积的加权总和计算得出的。
    例如:在pbmc单细胞数据中识别通路活性。

    1. 分析Bulk数据

    1.1 导入演示数据(芯片数据)
    library(airway)
    library(DESeq2)
    data(airway)
    
    # import data to DESeq2 and variance stabilize
    dset = DESeqDataSetFromMatrix(assay(airway),
                                  colData=as.data.frame(colData(airway)), design=~dex)
    dset = estimateSizeFactors(dset)
    dset = estimateDispersions(dset)
    gene_expr = getVarianceStabilizedData(dset)
    
    # annotate matrix with HGNC symbols
    library(biomaRt)
    mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
    genes = getBM(attributes = c("ensembl_gene_id","hgnc_symbol"),
                  values=rownames(gene_expr), mart=mart)
    matched = match(rownames(gene_expr), genes$ensembl_gene_id)
    rownames(gene_expr) = genes$hgnc_symbol[matched]
    dim(gene_expr)
    # [1] 64102     8
    gene_expr[1:5,1:5]
    #          SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
    # TSPAN6     9.531699   9.170317   9.674546   9.421197  10.027595
    # TNMD       5.302948   5.302948   5.302948   5.302948   5.302948
    # DPM1       9.055928   9.347026   9.235703   9.278863   9.166625
    # SCYL3      8.358430   8.273162   8.212763   8.316611   8.136760
    # C1orf112   6.967075   7.001886   6.596359   6.882393   7.060850
    
    1.2 分析和绘图
    library(progeny)
    pathways = progeny(gene_expr, scale=FALSE)
    controls = airway$dex == "untrt"
    ctl_mean = apply(pathways[controls,], 2, mean)
    ctl_sd = apply(pathways[controls,], 2, sd)
    pathways = t(apply(pathways, 1, function(x) x - ctl_mean))
    pathways = apply(pathways, 1, function(x) x / ctl_sd)
    
    library(pheatmap)
    myColor = colorRampPalette(c("Darkblue", "white","red"))(100)
    pheatmap(t(pathways),fontsize=14, show_rownames = F,
             color=myColor, main = "PROGENy", angle_col = 45, treeheight_col = 0,  
             border_color = NA)
    
    还可以加上分组标签,看起来会更直观

    2. 分析单细胞数据

    2.1 导入数据
    library(Seurat)
    library(progeny)
    library(tidyr)
    library(tibble)
    pbmc <- readRDS("pbmc.rds") #注释好的单细胞数据集
    
    2.2 提取细胞类型标签

    查看每个细胞类型这14个通路的活性

    ## We create a data frame with the specification of the cells that belong to 
    ## each cluster to match with the Progeny scores.
    CellsClusters <- data.frame(Cell = names(Idents(pbmc)), 
                                CellType = as.character(Idents(pbmc)),
                                stringsAsFactors = FALSE)
    DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
    
    2.3 计算
    ## We compute the Progeny activity scores and add them to our Seurat object
    ## as a new assay called Progeny. 
    pbmc <- progeny(pbmc, scale=FALSE, organism="Human", top=500, perm=1, 
                    return_assay = TRUE)
    pbmc@assays$progeny
    # Assay data with 14 features for 2638 cells
    # First 10 features:
    # Androgen, EGFR, Estrogen, Hypoxia, JAK-STAT, MAPK, NFkB, p53, PI3K, TGFb 
    
    pbmc对象下面的assay槽下面多了progeny

    pbmc@assays[["progeny"]]@data下储存的是14个通路在2638个细胞中的评分

    pbmc@assays[["progeny"]]@data[1:6,1:4]
    #          AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1
    # Androgen         86.88406         57.92163        94.287118        11.135108
    # EGFR             28.34279        -15.00708        -2.652041         1.738702
    # Estrogen        -51.13204        -67.55945       -64.452782       -37.103230
    # Hypoxia          96.56731        122.76491       135.750523        96.936620
    # JAK-STAT        163.11016        269.55272       315.408767       609.408980
    # MAPK            -10.39436        -59.37868       -17.161151       -61.763826
    
    2.4 整合同一细胞类型的评分
    ## We can now directly apply Seurat functions in our Progeny scores. 
    ## For instance, we scale the pathway activity scores. 
    pbmc <- Seurat::ScaleData(pbmc, assay = "progeny") 
    
    ## We transform Progeny scores into a data frame to better handling the results
    progeny_scores_df <- 
      as.data.frame(t(GetAssayData(pbmc, slot = "scale.data", 
                                   assay = "progeny"))) %>%
      rownames_to_column("Cell") %>%
      gather(Pathway, Activity, -Cell) 
    dim(progeny_scores_df)
    # [1] 36932     3
    
    ## We match Progeny scores with the cell clusters.
    progeny_scores_df <- inner_join(progeny_scores_df, CellsClusters)
    
    ## We summarize the Progeny scores by cellpopulation
    summarized_progeny_scores <- progeny_scores_df %>% 
      group_by(Pathway, CellType) %>%
      summarise(avg = mean(Activity), std = sd(Activity))
    dim(summarized_progeny_scores)
    # [1] 126   4
    
    ## We prepare the data for the plot
    summarized_progeny_scores_df <- summarized_progeny_scores %>%
      dplyr::select(-std) %>%   
      spread(Pathway, avg) %>%
      data.frame(row.names = 1, check.names = FALSE, stringsAsFactors = FALSE) 
    
    2.5 绘图
    paletteLength = 100
    myColor = colorRampPalette(c("Darkblue", "white","red"))(paletteLength)
    
    progenyBreaks = c(seq(min(summarized_progeny_scores_df), 0, 
                          length.out=ceiling(paletteLength/2) + 1),
                      seq(max(summarized_progeny_scores_df)/paletteLength, 
                          max(summarized_progeny_scores_df), 
                          length.out=floor(paletteLength/2)))
    progeny_hmap = pheatmap(t(summarized_progeny_scores_df),fontsize=12, 
                            fontsize_row = 10, 
                            color=myColor, breaks = progenyBreaks, 
                            main = "PROGENy (500)", angle_col = 45,
                            treeheight_col = 0,  border_color = NA)
    
    2.6 绘制单个通路的FeaturePlot
    DefaultAssay(pbmc) <- 'progeny'
    p1= FeaturePlot(pbmc,features = "NFkB", coord.fixed = T, order = T, cols = viridis(10))
    p2=FeaturePlot(pbmc,features = "MAPK", coord.fixed = T, order = T, cols = viridis(10))
    p1|p2
    

    相关文章

      网友评论

        本文标题:单细胞之富集分析-6:PROGENy

        本文链接:https://www.haomeiwen.com/subject/qgjrgrtx.html