美文网首页生信文献rna_seqscRNA-seq
复现2:AD与Normal细胞类型水平的差异基因挖掘(snRNA

复现2:AD与Normal细胞类型水平的差异基因挖掘(snRNA

作者: 小贝学生信 | 来源:发表于2021-08-24 23:02 被阅读0次
    • Single-nucleus transcriptome analysis reveals dysregulation of angiogenic endothelial cells and neuroprotective glia in Alzheimer's disease
    • PMID: 32989152 | 2020 Oct 13
    • IF : 11.202 | Proc Natl Acad Sci USA(PNAS)
    overall design
    • 文献思路:对AD和Normal来源的前额皮质组织进行snRNA-seq;进行细胞类型注释,然后基于细胞类型的水平,对AD和Normal进行差异分析。
      1-3


      Paper1B-D 细胞类型注释

      2-3


      Paper2A-D 基因细胞类型,AD与Normal的比较
      3-3
      Paper3A-D 细胞类型的深入挖掘(以Astro为例)
    • 图表复现思路
      (1)原始数据预处理,读为Seurat对象
      (2)质控、过滤
      (3)标准化、高变基因、归一化、降维(PCA、UMAP)、分群
      (4)细胞类型注释
      (5)基于细胞类型的AD/NC差异分析
      (6)以Astro为例的细胞再聚类深入分析

    提示:因为本次数据集涉及17W~细胞,所以使用的是服务器Rstudio,本地电脑比较困难。

    1、读入数据

    1.1 下载数据(GSE157827)--上传到服务器--批量改名(Read10X())--创建Seurat对象

    • 也可以在服务器中,直接使用 wget命令下载:https://ftp.ncbi.nlm.nih.gov/geo/series/GSE157nnn/GSE157827/suppl/
    setwd("~/scAD")
    fs=list.files('./GSE157827_RAW/','^GSM')
    fs
    library(tidyverse)
    samples=str_split(fs,'_',simplify = T)[,1]
    #批量将文件名改为 Read10X()函数能够识别的名字
    lapply(unique(samples),function(x){
      # x = unique(samples)[1]
      y=fs[grepl(x,fs)]
      folder=paste0("GSE157827_RAW/", paste(str_split(y[1],'_',simplify = T)[,1:2], collapse = "_"))
      dir.create(folder,recursive = T)
      #为每个样本创建子文件夹
      file.rename(paste0("GSE157827_RAW/",y[1]),file.path(folder,"barcodes.tsv.gz"))
      #重命名文件,并移动到相应的子文件夹里
      file.rename(paste0("GSE157827_RAW/",y[2]),file.path(folder,"features.tsv.gz"))
      file.rename(paste0("GSE157827_RAW/",y[3]),file.path(folder,"matrix.mtx.gz"))
    })
    library(Seurat)
    samples=list.files("GSE157827_RAW/")
    samples
    dir <- file.path('./GSE157827_RAW',samples)
    names(dir) <- samples
    #创建Seurat对象
    counts <- Read10X(data.dir = dir)
    scRNA = CreateSeuratObject(counts)
    dim(scRNA)   #查看基因数和细胞总数
    #[1]  33538 179392
    table(scRNA@meta.data$orig.ident)  #查看每个样本的细胞数
    head(scRNA@meta.data)
    

    1.2 metadata细节整理

    scRNA$orig.ident = stringr::str_split(rownames(scRNA@meta.data), "_",simplify = T)[,2]
    # AD、NC
    scRNA$group = substr(scRNA$orig.ident,1,2)
    head(scRNA@meta.data)
    table(scRNA$group)
    table(scRNA$orig.ident)
    #为了绘图直观,factor因子化,并且设置指定的levels排序
    ranks = order(as.numeric(substr(unique(scRNA$orig.ident),3,4)))
    scRNA$orig.ident = factor(scRNA$orig.ident, levels = unique(scRNA$orig.ident)[ranks])
    table(scRNA$orig.ident, scRNA$group)
    

    2、质控、过滤低质量的细胞、基因

    2.1 过滤前的指标可视化

    #指标1:nFeature_RNA 表达基因数
    feats <- c("nFeature_RNA", "nCount_RNA")
    library(patchwork)
    p_filt_b_1=VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 2, 
            group.by = "group") + NoLegend()
    p_filt_b_2=VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 1, 
            group.by = "orig.ident") + NoLegend()
    #指标2~3:线粒体、核糖体基因含量
    mito_genes=rownames(scRNA)[grep("^MT-", rownames(scRNA))] ;mito_genes 
    scRNA=PercentageFeatureSet(scRNA, "^MT-", col.name = "percent_mito")
    fivenum(scRNA@meta.data$percent_mito)
    ribo_genes=rownames(scRNA)[grep("^Rp[sl]", rownames(scRNA),ignore.case = T)];ribo_genes
    scRNA=PercentageFeatureSet(scRNA, "^RP[SL]", col.name = "percent_ribo")
    fivenum(scRNA@meta.data$percent_ribo)
    feats <- c("percent_mito","percent_ribo")
    p_filt_b_3 =VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 2, 
            group.by = "group") + NoLegend()
    p_filt_b_4=VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 1, 
            group.by = "orig.ident") + NoLegend()
    (p_filt_b_1 / p_filt_b_2) | (p_filt_b_3 / p_filt_b_4)
    

    2.2 确定过滤细胞、基因的阈值

    • 细胞的相关阈值
    #细胞-表达基因数-筛掉过高、过低
    retained_c_umi_low <- scRNA$nFeature_RNA > 300
    retained_c_umi_high <- scRNA$nFeature_RNA < 8000
    #细胞-线粒体/核糖体含量-筛掉过低
    retained_c_mito <- scRNA$percent_mito < 14
    retained_c_ribo <- scRNA$percent_ribo < 3
    table(retained_c_mito & retained_c_ribo & retained_c_umi_low & retained_c_umi_high)
    # FALSE   TRUE 
    # 9876 169516
    table(scRNA$group[retained_c_mito & retained_c_ribo & retained_c_umi_low & retained_c_umi_high])
    # AD    NC 
    # 90480 79036
    
    #指标可视化
    #devtools::install_github("gaospecial/ggVennDiagram")
    veen_eles = list(`Filt_low_umi\n(300)` = umi_low,
                     `Filt_high_umi\n(8000)` = umi_high,
                     `Filt_high_mito\n(14%)` = mito,
                     `Filt_high_ribo\n(3%)` = ribo)
    library("ggVennDiagram")
    ggVennDiagram(veen_eles, set_size = 4)  +
      ggplot2::scale_fill_gradient(low="#ffffb2",high = "#f03b20")
    

    值得注意的是:snRNA-seq与scRNA-seq分析过程中最主要的不同便在于线粒体与核糖体比例。因为前者测的仅为核基因数据,所以理论上很少有核糖体、线粒体相关基因,因此需要过滤掉高表达核糖体、线粒体相关基因。

    • 基因的相关阈值
    feature_rowsum = Matrix::rowSums(scRNA@assays$RNA@counts>0) 
    head(feature_rowsum)
    retained_f_low <- feature_rowsum > ncol(scRNA)*0.005
    table(retained_f_low)
    # FALSE  TRUE 
    # 16262 17276
    rankplot = data.frame(feature_count = sort(feature_rowsum),
                              gene = names(sort(retained_f_low)),
                              Rank = 1:length(feature_rowsum))
    library(ggbreak)
    ggplot(rankplot, aes(x=Rank, y=feature_count)) + 
      geom_point() + scale_y_break(c(10000,100000)) +
      geom_hline(yintercept=ncol(scRNA)*0.005, color="red") +
      geom_text(x=10000, y=4000, label="Feature cutoff : ncol(scRNA)*0.5%", size = 5)
    
    • 根据上面的阈值进行过滤
    scRNA_filt = scRNA[retained_f_low, retained_c_mito & retained_c_ribo & 
                                        retained_c_umi_low & retained_c_umi_high]
    dim(scRNA_filt)
    #[1]  17276 169516
    table(scRNA_filt$group)
    # AD    NC 
    # 90480 79036
    

    2.3 过滤后的指标可视化

    feats <- c("nFeature_RNA", "nCount_RNA")
    p_filt_a_1=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 2, 
            group.by = "group") + NoLegend()
    p_filt_a_2=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 1, 
            group.by = "orig.ident") + NoLegend()
    feats <- c("percent_mito","percent_ribo")
    p_filt_a_3=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 2, 
            group.by = "group") + NoLegend()
    p_filt_a_4=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 1, 
            group.by = "orig.ident") + NoLegend()
    (p_filt_a_1 / p_filt_a_2) | (p_filt_a_3 / p_filt_a_4)
    

    2.4 保存过滤后的数据(optional)

    # saveRDS(scRNA_filt, file = "sce_filt.rds")
    # scRNA = readRDS("sce_filt.rds")
    

    3、标准化、高变基因、归一化、降维(PCA、UMAP)、分群

    3.1 标准化、高变基因、归一化

    • 由于数据量比较大,ScaleData()函数比较耗时,尤其是加上回归参数。可使用future包的plan()函数设置线程数,详见https://satijalab.org/seurat/archive/v3.0/future_vignette.html
    • 推荐使用四个线程,此外如果在FindIntegrationAnchors()步骤使用报错,设置options(future.globals.maxSize = 1000 * 1024^2) #1G plan()即可
    library(future)
    plan()
    plan("multiprocess", workers = 4)
    plan()
    
    scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
    scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 2000) 
    scRNA <- ScaleData(scRNA, features = VariableFeatures(scRNA), 
                       vars.to.regress = c("nFeature_RNA","percent_mito"))
    

    3.2 降维、分群

    scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA)) 
    scRNA = RunUMAP(scRNA, dims = pc.num)
    #根据group,展示dimplot
    p_dim = DimPlot(scRNA, group.by = "group")
    scRNA <- FindNeighbors(scRNA, dims = pc.num) 
    scRNA <- FindClusters(scRNA, resolution = c(0.01,0.05,0.1,0.2,0.5))
    #不同分辨率的分群效果
    p_cutree = clustree(scRNA@meta.data, prefix = "RNA_snn_res.")
    Idents(scRNA) = scRNA$RNA_snn_res.0.05
    table(scRNA@active.ident)
    p_dim_1 = DimPlot(scRNA, label = T)
    
    p_dim | p_cutree | p_dim_1
    

    4、细胞类型注释

    4.1 逐一展示每个细胞类型的marker gene dotplot,注释cluster

    # astrocytes: AQP4, ADGRV1, GPC5, RYR3
    # endothelial cells: CLDN5, ABCB1, EBF1
    # excitatory neurons: CAMK2A, CBLN2, LDB2
    # inhibitory neurons: GAD1, LHFPL3, PCDH15
    # microglia: C3, LRMDA, DOCK8
    # oligodendrocytes: MBP, PLP1, ST18
    astrocytes = c("AQP4", "ADGRV1", "GPC5", "RYR3")
    DotPlot(scRNA, features = astrocytes, 
            assay = "RNA") # 2
    
    endothelial = c("CLDN5", "ABCB1", "EBF1")
    DotPlot(scRNA, features = endothelial, 
            assay = "RNA") # 10
    
    excitatory = c("CAMK2A", "CBLN2", "LDB2")
    DotPlot(scRNA, features = excitatory, 
            assay = "RNA") # 0,6,8,9,11
    
    inhibitory = c("GAD1", "LHFPL3", "PCDH15")
    DotPlot(scRNA, features = inhibitory, 
            assay = "RNA") # 3,4,5
    
    microglia = c("C3", "LRMDA", "DOCK8")
    DotPlot(scRNA, features = microglia, 
            assay = "RNA") # 7
    
    oligodendrocytes = c("MBP", "PLP1", "ST18")
    DotPlot(scRNA, features = oligodendrocytes, 
            assay = "RNA") # 1
    
    • 合并dotplot
    gene_list = list(
      Astro = astrocytes,
      Endo = endothelial,
      Excit = excitatory,
      Inhib = inhibitory,
      Mic = microglia,
      Oligo = oligodendrocytes
    )
    # 调整cluster的level因子顺序,从而较美观的展示dotplot
    scRNA@active.ident = factor(scRNA@active.ident, levels = c(2,10,0,6,8,9,11,3,4,5,7,1))
    p_dot_1 = DotPlot(scRNA, features = gene_list, 
            assay = "RNA") +
      theme(axis.text.x = element_text(angle = 60, vjust = 0.8, hjust=0.8))
    
    p_dim_1 | p_dot_1
    

    4.2 根据上述结果,注释细胞类型

    scRNA$celltype = ifelse(scRNA@active.ident %in% c(2), "Astro",
                            ifelse(scRNA@active.ident %in% c(10), "Endo",
                                   ifelse(scRNA@active.ident %in% c(7), "Mic",
                                          ifelse(scRNA@active.ident %in% c(1), "Oligo",
                                                 ifelse(scRNA@active.ident %in% c(3,4,5), "Inhib",
                                                        "Excit")))))
    table(scRNA$celltype)
    Idents(scRNA) = scRNA$celltype
    scRNA@active.ident = factor(scRNA@active.ident, levels = c("Astro","Endo","Excit","Inhib","Mic","Oligo"))
    table(scRNA@active.ident)
    p_dot_2= DotPlot(scRNA, features = gene_list, 
                 assay = "RNA") +
      theme(axis.text.x = element_text(angle = 60, vjust = 0.8, hjust=0.8))
    p_dim_2= DimPlot(scRNA, label = T)
    p_dim_2 | p_dot_2 
    

    4.3 复现paper1B-D

    #Fig 1B
    fig_1b=DimPlot(scRNA,  cols = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02'))
    
    #Fig 1C
    ct_stat = table(scRNA$celltype)
    ct_stat = data.frame(celltype = names(ct_stat),
                         total = as.numeric(ct_stat),
                         percentage = ct_stat/sum(ct_stat))
    library(ggplot2)
    fig_1c=ggplot(ct_stat, aes(x=celltype, fill=celltype, y = percentage.Freq)) +
      geom_bar(stat="identity",color = "black")  + NoLegend() +
      scale_fill_manual(values=c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')) +
      theme_classic() + theme(axis.title.x = element_blank()) +
      ylab("Proportion of total nuclei(%)")
    
    #Fig 1D
    #Downsample 5000个细胞做Fingallmarker从而节省时间
    table(scRNA@active.ident)
    CTs = as.character(unique(scRNA@active.ident))
    sampled = unlist(lapply(CTs, function(x){
                #x = CTs[1]
                index = scRNA@active.ident %in% x
                cells = names(scRNA@active.ident[index])
                if(length(cells) > 5000){
                  sle_cell = sample(cells, 5000)
                } else {
                  sle_cell = cells
                }
              }))
    
    scRNA_sample = scRNA[ ,sampled]
    dim(scRNA_sample)
    table(scRNA_sample@active.ident)
    #diff.wilcox = FindAllMarkers(scRNA_sample)
    library(tidyverse)
    all.markers = diff.wilcox %>% select(gene, everything()) %>%
      subset(p_val<0.05 & abs(diff.wilcox$avg_log2FC) > 0.5)
    top5 = all.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
    top5 = CaseMatch(search = as.vector(top5$gene), match = rownames(scRNA_sample)) 
    #save(top5, file = "top5.rda")
    #load("top5.rda")
    fig_1d = DoHeatmap(scRNA_sample, features = top5, angle = 0,
                       group.colors = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')) 
    
    #合并
    ((fig_1b + labs(tag = "B"))+ (fig_1c +  labs(tag = "C"))  ) / (fig_1d +  labs(tag = "D"))
    

    5、基于细胞类型的AD/NC差异分析(Fig 5A-D复现)

    #Fig 2a
    fig_2a=DimPlot(scRNA, split.by = "group", cols = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02'))
    
    #Fig 2b
    ct_stat2 = as.data.frame(table(scRNA$celltype, scRNA$group))
    sums = rep(c(sum(ct_stat2$Freq[1:6]),sum(ct_stat2$Freq[7:12])),each=6)
    ct_stat2$percentage = ct_stat2$Freq/sums
    
    ct_stat2$Var2 = factor(ct_stat2$Var2, levels = c("NC","AD"))
    library(ggplot2)
    fig_2b=ggplot(ct_stat2, aes(x=Var1, fill=Var2, y = percentage)) +
      geom_bar( position=position_dodge(width=0.8), width=0.6,
                stat="identity",color = "black")  + 
      scale_fill_manual(values=c('white','black')) +
      theme_classic() + ylab("Proportion of total nuclei(%)") + 
      theme(axis.title.x = element_blank(), legend.title = element_blank())
    
    • 每种细胞类型的差异分析
    library(future)
    plan()
    plan("multiprocess", workers = 4)
    plan()
    
    head(scRNA@meta.data)
    head(scRNA@meta.data[,c("group","celltype")])
    scRNA$compare = paste(scRNA$group, scRNA$celltype, sep = "_")
    table(scRNA$compare)
    # AD_Astro  AD_Endo AD_Excit AD_Inhib   AD_Mic AD_Oligo 
    # 10101     1621    32277    19271     4209    23001 
    # NC_Astro  NC_Endo NC_Excit NC_Inhib   NC_Mic NC_Oligo 
    # 8782      598    30050    18021     3714    17871 
    
    ct = levels(scRNA@active.ident)
    all_markers = lapply(ct, function(x){
                  # x = ct[1]
                  print(x)
                  markers <- FindMarkers(scRNA, group.by = "compare",
                                         logfc.threshold = 0.1,
                                         ident.1 = paste("AD",x,sep = "_"),
                                         ident.2 = paste("NC",x,sep = "_"))
                  #markers_sig <- subset(markers, p_val_adj < 0.1)
                  return(markers)
                })
    length(all_markers)
    names(all_markers) = ct
    lapply(all_markers,nrow)
    
    • 根据文章提供的阈值,确认差异基因:
      compared the individual cell-type between AD and NC samples.
      adjusted P < 0.1, log 2 fold change ≥ 0.1 or ≤ −0.1
    all_markers_sig = lapply(all_markers, function(x){
      markers_sig <- subset(x, p_val_adj < 0.1)
      #markers_sig <- subset(x, p_val < 0.0001)
    })
    marker_stat = as.data.frame(lapply(all_markers_sig,function(x){
      # x=all_markers_sig[[1]]
      Up = sum(x$avg_log2FC>0)
      Down = sum(x$avg_log2FC<0)
      Total = length(x$avg_log2FC)
      return(c(Up, Down, Total))
    }))
    rownames(marker_stat) = c("Up","Down","Total")
    marker_stat
    library(gridExtra)
    fig_2c = ggpubr::as_ggplot(tableGrob(marker_stat, theme = ttheme_default(base_size = 25)))
    
    • Fig 2D Veen图
      难点:venn.diagram转为ggplot对象,以便于拼图
    names(all_markers_sig)
    Astro = rownames(all_markers_sig$Astro)
    Endo = rownames(all_markers_sig$Endo)
    `Excit+Inhib` = unique(c(rownames(all_markers_sig$Excit),
                      rownames(all_markers_sig$Inhib)))
    Mic = rownames(all_markers_sig$Mic)
    Oligo = rownames(all_markers_sig$Oligo)
    veen_eles2 = list(Astro=Astro, Endo=Endo, 
                      `Excit+Inhib`=`Excit+Inhib`,
                      Mic=Mic, Oligo=Oligo)
    library(VennDiagram)
    library(ggplotify)
    library(gridGraphics)
    plt=venn.diagram(veen_eles2, filename=NULL)
    p_tmp <- grobTree(plt)
    fig_2d=as.ggplot(plot_grid(p_tmp))
    
    • 合并
    (fig_2a + fig_2b)/(fig_2c + fig_2d) + plot_annotation(tag_levels = 'A')
    

    6、以Astro为例的细胞再聚类深入分析(Fig 4A-D)

    • Astro细胞类型的clsuter再聚类→分为三大类:
      (1) Astro细胞的AD上调基因相对高表达的clusters
      (2) Astro细胞的AD下调基因相对高表达的clusters
      (3) 其它clusters
      对前两大类cluster进行差异分析,以及富集分析(有点绕,可参看原文)
    • 首先对Astro细胞类型的clsuter,重新分析
    library(future)
    plan()
    plan("multiprocess", workers = 4)
    plan()
    #subcluster analysis
    scRNA_astro = subset(scRNA, celltype == "Astro")
    dim(scRNA_astro)
    scRNA_astro <- NormalizeData(scRNA_astro, normalization.method = "LogNormalize", scale.factor = 10000)
    scRNA_astro <- FindVariableFeatures(scRNA_astro, selection.method = "vst", nfeatures = 2000) 
    scRNA_astro <- ScaleData(scRNA_astro, features = VariableFeatures(scRNA_astro), 
                       vars.to.regress = c("nFeature_RNA","percent_mito"))
    scRNA_astro <- RunPCA(scRNA_astro, features = VariableFeatures(scRNA_astro)) 
    ElbowPlot(scRNA_astro, ndims=30, reduction="pca") 
    pc.num=1:30
    scRNA_astro = RunUMAP(scRNA_astro, dims = pc.num)
    DimPlot(scRNA_astro, group.by = "group")
    DimPlot(scRNA_astro, split.by = "group")
    
    scRNA_astro <- FindNeighbors(scRNA_astro, dims = pc.num)
    scRNA_astro <- FindClusters(scRNA_astro, resolution = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5))
    clustree(scRNA_astro@meta.data, prefix = "RNA_snn_res.")
    #选择resolution 0.3的分群结果
    scRNA_astro$sle_reso = as.numeric(scRNA_astro$RNA_snn_res.0.3)
    scRNA_astro$sle_reso = paste0("a",scRNA_astro$sle_reso)
    table(scRNA_astro$sle_reso)
    scRNA_astro$sle_reso = factor(scRNA_astro$sle_reso, levels = paste0("a",1:12))
    Idents(scRNA_astro) = scRNA_astro$sle_reso
    table(scRNA_astro@active.ident)
    scRNA_astro$group = factor(scRNA_astro$group, levels = c("NC","AD"))
    fig_4a=DimPlot(scRNA_astro, split.by = "group")
    p_sub_dim = DimPlot(scRNA_astro, label = T)
    
    • 根据上述结果,以及对应paper,分为3大类细胞
    scRNA_astro$subpop = ifelse(scRNA_astro@active.ident %in% c("a1","a5"), "Up",
                                ifelse(scRNA_astro@active.ident %in% c("a4","a3","a11"), 
                                       "Down","No change"))
    table(scRNA_astro$subpop)
    scRNA_astro$subpop = factor(scRNA_astro$subpop, levels = c("Up","Down","No change"))
    fig_4b=DimPlot(scRNA_astro, group.by = "subpop", cols = c("#e41a1c","#4daf4a","#d9d9d9"))
    
    • subcluster DEG
    marker_Astro <- FindMarkers(scRNA_astro, group.by = "subpop",
                           ident.1 = "Down",
                           ident.2 = "Up")
    marker_Astro = marker_Astro[order(marker_Astro$avg_log2FC, decreasing = T),]
    head(marker_Astro)
    scRNA_astro_sub = subset(scRNA_astro, subpop != "No change")
    scRNA_astro_sub$subpop = factor(scRNA_astro_sub$subpop, levels = c("Down","Up"))
    library(future)
    plan()
    plan("multiprocess", workers = 4)
    plan()
    scRNA_astro_sub <- ScaleData(scRNA_astro_sub, features = rownames(marker_Astro), 
                       vars.to.regress = c("nFeature_RNA","percent_mito"))
    
    #Doheatmap结果中,只展示部分感兴趣(与paper重合)的基因注释
    paper = c("GLUD1","GLUL","PTN","TNIK","SLC4A4","GABRA2","VEGFA","ETNPPL","SLC1A3",
              "TUBB2B","SPARCL1","HES1","WIF1","HES5","APOE","SLC1A2","F3")
    table(paper %in% rownames(marker_Astro)[1:100])
    genes.to.label = paper[paper %in% rownames(marker_Astro)[1:100]]
    labels <- rep(x = "transparent", times = length(x = rownames(marker_Astro)[1:100]))
    labels[match(x = genes.to.label, table = rownames(marker_Astro)[1:100])] <- "black"
    fig_4c=DoHeatmap(object = scRNA_astro_sub, features = rownames(marker_Astro)[1:100], 
              group.by = "subpop", group.colors = c("#4daf4a","#e41a1c"), angle = 0) +
      theme(axis.text.y = element_text(color = rev(x = labels)))
    fig_4c
    
    • 差异基因的富集分析结果
    library(clusterProfiler)
    library(org.Hs.eg.db)
    ego <- enrichGO(gene          = rownames(marker_Astro)[1:200],
                    keyType       = "SYMBOL",
                    OrgDb         = org.Hs.eg.db,
                    ont           = "BP",
                    pAdjustMethod = "BH",
                    pvalueCutoff  = 1,
                    qvalueCutoff  = 1)
    res = ego@result
    interest_go = which(rownames(ego@result) %in% c("GO:0021953","GO:0050769","GO:0021872","GO:0021879",
                    "GO:0007409","GO:0042552","GO:0008366","GO:0030900")
    interest_go=ego@result$Description[interest_go]
    fig_4d=barplot(ego, showCategory=interest_go, cols="pvalue") 
    
    • 合并
    (fig_4a + fig_4b)/(fig_4c + fig_4d) + plot_layout(widths = c(2,1)) + 
      plot_annotation(tag_levels = 'A')
    

    相关文章

      网友评论

        本文标题:复现2:AD与Normal细胞类型水平的差异基因挖掘(snRNA

        本文链接:https://www.haomeiwen.com/subject/rpljiltx.html