美文网首页单细胞转录组
hdWGCNA系列推文(二):分析单细胞转录组数据

hdWGCNA系列推文(二):分析单细胞转录组数据

作者: 生信宝库 | 来源:发表于2023-06-03 11:24 被阅读0次

    前言

    Immugent在hdWGCNA系列推文第一篇:hdWGCNA:将单细胞和空间转录组的WGCNA分析变成现实中,介绍了hdWGCNA包的主要功能框架,并在上一期推文:hdWGCNA系列推文(一):分析流程的搭建中给大家说明了如何安装hdWGCNA。那么从本期推文开始,正式开始讲解如何在实际分析数据的过程中使用hdWGCNA。

    在正式开始之前呢,我们先来回忆一下WGCNA分析到底是个啥?其实简单总结一下就是一句话:关联表型和基因。WGCNA通过将基因进行分组(module),把基因模块和表型进行关联,实现了快速锁定核心基因的目的。整体来讲,WGCNA的分析流程是很繁琐的,一个全套的分析可能会涉及8-9个步骤,但是,这里面有很多步骤其实无关紧要,跟分析的主线,也就是“筛选与表型相关的核心基因”是脱离的。

    还有一点最主要的是以往的WGCNA分析都是针对bulk测序数据,而hdWGCNA包的开发专门用于单细胞测序数据的使用,而且作者极大的简化了WGCNA分析部分的流程,而添加了很多可视化的流程。废话不多说,下面开始展示。


    代码流程

    首先就是导入模拟数据,大家也可以用自己的单细胞测序数据。

    # single-cell analysis package
    library(Seurat)
    
    # plotting and data science packages
    library(tidyverse)
    library(cowplot)
    library(patchwork)
    
    # co-expression network analysis packages:
    library(WGCNA)
    library(hdWGCNA)
    
    # using the cowplot theme for ggplot
    theme_set(theme_cowplot())
    
    # set random seed for reproducibility
    set.seed(12345)
    
    # optionally enable multithreading
    enableWGCNAThreads(nThreads = 8)
    
    # load the Zhou et al snRNA-seq dataset
    seurat_obj <- readRDS('Zhou_2020.rds')
    
    p <- DimPlot(seurat_obj, group.by='cell_type', label=TRUE) +   umap_theme() + ggtitle('Zhou et al Control Cortex') + NoLegend()
    p
    
    image.png

    Set up Seurat object for WGCNA

    seurat_obj <- SetupForWGCNA(
      seurat_obj,
      gene_select = "fraction", # the gene selection approach
      fraction = 0.05, # fraction of cells that a gene needs to be expressed in order to be included
      wgcna_name = "tutorial" # the name of the hdWGCNA experiment
    )
    
    # construct metacells  in each group
    seurat_obj <- MetacellsByGroups(
      seurat_obj = seurat_obj,
      group.by = c("cell_type", "Sample"), # specify the columns in seurat_obj@meta.data to group by
      reduction = 'harmony', # select the dimensionality reduction to perform KNN on
      k = 25, # nearest-neighbors parameter
      max_shared = 10, # maximum number of shared cells between two metacells
      ident.group = 'cell_type' # set the Idents of the metacell seurat object
    )
    
    # normalize metacell expression matrix:
    seurat_obj <- NormalizeMetacells(seurat_obj)
    

    Co-expression network analysis

    seurat_obj <- SetDatExpr(
      seurat_obj,
      group_name = "INH", # the name of the group of interest in the group.by column
      group.by='cell_type', # the metadata column containing the cell type info. This same column should have also been used in MetacellsByGroups
      assay = 'RNA', # using RNA assay
      slot = 'data' # using normalized data
    )
    
    # Test different soft powers:
    seurat_obj <- TestSoftPowers(
      seurat_obj,
      networkType = 'signed' # you can also use "unsigned" or "signed hybrid"
    )
    
    # plot the results:
    plot_list <- PlotSoftPowers(seurat_obj)
    
    # assemble with patchwork
    wrap_plots(plot_list, ncol=2)
    
    image.png

    Construct co-expression network

    power_table <- GetPowerTable(seurat_obj)
    head(power_table)
    
    # construct co-expression network:
    seurat_obj <- ConstructNetwork(
      seurat_obj, soft_power=9,
      setDatExpr=FALSE,
      tom_name = 'INH' # name of the topoligical overlap matrix written to disk
    )
    
    PlotDendrogram(seurat_obj, main='INH hdWGCNA Dendrogram')
    
    image.png

    Module Eigengenes and Connectivity

    # need to run ScaleData first or else harmony throws an error:
    seurat_obj <- ScaleData(seurat_obj, features=VariableFeatures(seurat_obj))
    
    # compute all MEs in the full single-cell dataset
    seurat_obj <- ModuleEigengenes(
     seurat_obj,
     group.by.vars="Sample"
    )
    
    # harmonized module eigengenes:
    hMEs <- GetMEs(seurat_obj)
    
    # module eigengenes:
    MEs <- GetMEs(seurat_obj, harmonized=FALSE)
    
    # compute eigengene-based connectivity (kME):
    seurat_obj <- ModuleConnectivity(
      seurat_obj,
      group.by = 'cell_type', group_name = 'INH'
    )
    
    # rename the modules
    seurat_obj <- ResetModuleNames(
      seurat_obj,
      new_name = "INH-M"
    )
    
    # plot genes ranked by kME for each module
    p <- PlotKMEs(seurat_obj, ncol=5)
    
    p
    
    image.png
    # get the module assignment table:
    modules <- GetModules(seurat_obj)
    
    # show the first 6 columns:
    head(modules[,1:6])
    
    # get hub genes
    hub_df <- GetHubGenes(seurat_obj, n_hubs = 10)
    
    head(hub_df)
    
    # compute gene scoring for the top 25 hub genes by kME for each module
    # with Seurat method
    seurat_obj <- ModuleExprScore(
      seurat_obj,
      n_genes = 25,
      method='Seurat'
    )
    
    # compute gene scoring for the top 25 hub genes by kME for each module
    # with UCell method
    library(UCell)
    seurat_obj <- ModuleExprScore(
      seurat_obj,
      n_genes = 25,
      method='UCell'
    )
    

    Basic Visualization

    # make a featureplot of hMEs for each module
    plot_list <- ModuleFeaturePlot(
      seurat_obj,
      features='hMEs', # plot the hMEs
      order=TRUE # order so the points with highest hMEs are on top
    )
    
    # stitch together with patchwork
    wrap_plots(plot_list, ncol=6)
    
    image.png

    Module Correlations

    # plot module correlagram
    ModuleCorrelogram(seurat_obj)
    
    image.png
    # get hMEs from seurat object
    MEs <- GetMEs(seurat_obj, harmonized=TRUE)
    mods <- colnames(MEs); mods <- mods[mods != 'grey']
    
    # add hMEs to Seurat meta-data:
    seurat_obj@meta.data <- cbind(seurat_obj@meta.data, MEs)
    
    # plot with Seurat's DotPlot function
    p <- DotPlot(seurat_obj, features=mods, group.by = 'cell_type')
    
    # flip the x/y axes, rotate the axis labels, and change color scheme:
    p <- p +
      coord_flip() +
      RotatedAxis() +
      scale_color_gradient2(high='red', mid='grey95', low='blue')
    
    # plot output
    p
    
    image.png
    # Plot INH-M4 hME using Seurat VlnPlot function
    p <- VlnPlot(
      seurat_obj,
      features = 'INH-M12',
      group.by = 'cell_type',
      pt.size = 0 # don't show actual data points
    )
    
    # add box-and-whisker plots on top:
    p <- p + geom_boxplot(width=.25, fill='white')
    
    # change axis labels and remove legend:
    p <- p + xlab('') + ylab('hME') + NoLegend()
    
    # plot output
    p
    
    image.png

    说在最后

    整体来说,hdWGCNA分析的这个过程并不复杂,就是把已经构建好的Seurat对象中每群细胞的特征模块提取出来。从上面的结果可以看出,hdWGCNA对一些细胞群的特征提取还是很特异的,但是其中对某几群细胞的特征提取并没有成功。导致这种结果的原因有很多,比如这群细胞数量多少的问题,以及测序质量的问题,亦或者其中有几群细胞本身就很相似。但是我们也不能太挑剔,所有的优质算法都是一步步不断完善的,希望hdWGCNA能出下一个版本,并且能更好的解决这些短板。

    好啦,本期分享到这里就结束了,我们下期再会~~

    相关文章

      网友评论

        本文标题:hdWGCNA系列推文(二):分析单细胞转录组数据

        本文链接:https://www.haomeiwen.com/subject/fxqvedtx.html