前言
hdWGCNA是一个R软件包,用于在单细胞RNA-seq或空间转录组学等高维转录组学数据中执行加权基因共表达网络分析(WGCNA)。hdWGCNA是高度模块化的,可以构建跨多尺度细胞和空间层次的共表达网络。hdWGNCA识别互连基因的健壮模块,并通过各种生物学知识来源提供这些模块的背景。hdWGCNA需要将数据格式化为Seurat对象,这是单细胞数据最普遍的格式之一。
软件包优势:
-
hdWGCNA构建高维转录组学数据中的共表达网络
-
hdWGCNA提供统计、可视化和下游解释工具
-
hdWGCNA是一个使用Seurat数据结构的开源R包
-
hdWGCNA在人类疾病中的应用证明了对复杂数据集的真实分析
![](https://img.haomeiwen.com/i14607083/94c8aa59a48fa520.png)
代码流程
作者推荐构建一个新的R语言环境来供hdWGCNA分析使用。
# create new conda environment for R
conda create -n hdWGCNA -c conda-forge r-base r-essentials
# activate conda environment
conda activate hdWGCNA
紧接着打开R语言,按照依赖包:
Bioconductor, an R-based software ecosystem for bioinformatics and biostatistics.
Seurat, a general-purpose toolkit for single-cell data science.
WGCNA, a package for co-expression network analysis.
igraph, a package for general network analysis and visualization.
devtools, a package for package development in R.
# install BiocManager
install.packages("BiocManager")
# install Bioconductor core packages
BiocManager::install()
# install additional packages:
install.packages(c("Seurat", "WGCNA", "igraph", "devtools"))
随后可以通过下列命令安装hdWGCNA包;
devtools::install_github('smorabit/hdWGCNA', ref='dev')
如果直接按照失败,可能是网络的原因。可以换个时间点挑个网络好的时候安装。也可以通过下载source code,通过运行以下命令安装。
devtools::install_local("hdWGCNA-0.1.1.tar.gz")
当然,如果所有办法都尝试过了,均安装不上,可以通过后台联系我们远程帮你安装。
代码流程
首先就是导入模拟数据,大家也可以用自己的单细胞测序数据。
# single-cell analysis package
library(Seurat)
# plotting and data science packages
library(tidyverse)
library(cowplot)
library(patchwork)
# co-expression network analysis packages:
library(WGCNA)
library(hdWGCNA)
# using the cowplot theme for ggplot
theme_set(theme_cowplot())
# set random seed for reproducibility
set.seed(12345)
# optionally enable multithreading
enableWGCNAThreads(nThreads = 8)
# load the Zhou et al snRNA-seq dataset
seurat_obj <- readRDS('Zhou_2020.rds')
p <- DimPlot(seurat_obj, group.by='cell_type', label=TRUE) +
umap_theme() + ggtitle('Zhou et al Control Cortex') + NoLegend()
p
![](https://img.haomeiwen.com/i14607083/772c19e9c662952f.png)
Set up Seurat object for WGCNA
seurat_obj <- SetupForWGCNA(
seurat_obj,
gene_select = "fraction", # the gene selection approach
fraction = 0.05, # fraction of cells that a gene needs to be expressed in order to be included
wgcna_name = "tutorial" # the name of the hdWGCNA experiment
)
# construct metacells in each group
seurat_obj <- MetacellsByGroups(
seurat_obj = seurat_obj,
group.by = c("cell_type", "Sample"), # specify the columns in seurat_obj@meta.data to group by
reduction = 'harmony', # select the dimensionality reduction to perform KNN on
k = 25, # nearest-neighbors parameter
max_shared = 10, # maximum number of shared cells between two metacells
ident.group = 'cell_type' # set the Idents of the metacell seurat object
)
# normalize metacell expression matrix:
seurat_obj <- NormalizeMetacells(seurat_obj)
Co-expression network analysis
seurat_obj <- SetDatExpr(
seurat_obj,
group_name = "INH", # the name of the group of interest in the group.by column
group.by='cell_type', # the metadata column containing the cell type info. This same column should have also been used in MetacellsByGroups
assay = 'RNA', # using RNA assay
slot = 'data' # using normalized data
)
# Test different soft powers:
seurat_obj <- TestSoftPowers(
seurat_obj,
networkType = 'signed' # you can also use "unsigned" or "signed hybrid"
)
# plot the results:
plot_list <- PlotSoftPowers(seurat_obj)
# assemble with patchwork
wrap_plots(plot_list, ncol=2)
![](https://img.haomeiwen.com/i14607083/243386b75b5a3681.png)
Construct co-expression network
power_table <- GetPowerTable(seurat_obj)
head(power_table)
# construct co-expression network:
seurat_obj <- ConstructNetwork(
seurat_obj, soft_power=9,
setDatExpr=FALSE,
tom_name = 'INH' # name of the topoligical overlap matrix written to disk
)
PlotDendrogram(seurat_obj, main='INH hdWGCNA Dendrogram')
![](https://img.haomeiwen.com/i14607083/fdd24269f28a865d.png)
Module Eigengenes and Connectivity
# need to run ScaleData first or else harmony throws an error:
seurat_obj <- ScaleData(seurat_obj, features=VariableFeatures(seurat_obj))
# compute all MEs in the full single-cell dataset
seurat_obj <- ModuleEigengenes(
seurat_obj,
group.by.vars="Sample"
)
# harmonized module eigengenes:
hMEs <- GetMEs(seurat_obj)
# module eigengenes:
MEs <- GetMEs(seurat_obj, harmonized=FALSE)
# compute eigengene-based connectivity (kME):
seurat_obj <- ModuleConnectivity(
seurat_obj,
group.by = 'cell_type', group_name = 'INH'
)
# rename the modules
seurat_obj <- ResetModuleNames(
seurat_obj,
new_name = "INH-M"
)
# plot genes ranked by kME for each module
p <- PlotKMEs(seurat_obj, ncol=5)
p
![](https://img.haomeiwen.com/i14607083/b82e3e7a42fd8e3d.png)
# get the module assignment table:
modules <- GetModules(seurat_obj)
# show the first 6 columns:
head(modules[,1:6])
# get hub genes
hub_df <- GetHubGenes(seurat_obj, n_hubs = 10)
head(hub_df)
# compute gene scoring for the top 25 hub genes by kME for each module
# with Seurat method
seurat_obj <- ModuleExprScore(
seurat_obj,
n_genes = 25,
method='Seurat'
)
# compute gene scoring for the top 25 hub genes by kME for each module
# with UCell method
library(UCell)
seurat_obj <- ModuleExprScore(
seurat_obj,
n_genes = 25,
method='UCell'
)
Basic Visualization
# make a featureplot of hMEs for each module
plot_list <- ModuleFeaturePlot(
seurat_obj,
features='hMEs', # plot the hMEs
order=TRUE # order so the points with highest hMEs are on top
)
# stitch together with patchwork
wrap_plots(plot_list, ncol=6)
![](https://img.haomeiwen.com/i14607083/b754e562e80987a5.png)
Module Correlations
# plot module correlagram
ModuleCorrelogram(seurat_obj)
![](https://img.haomeiwen.com/i14607083/5a1dd1dc67d65da9.png)
# get hMEs from seurat object
MEs <- GetMEs(seurat_obj, harmonized=TRUE)
mods <- colnames(MEs); mods <- mods[mods != 'grey']
# add hMEs to Seurat meta-data:
seurat_obj@meta.data <- cbind(seurat_obj@meta.data, MEs)
# plot with Seurat's DotPlot function
p <- DotPlot(seurat_obj, features=mods, group.by = 'cell_type')
# flip the x/y axes, rotate the axis labels, and change color scheme:
p <- p +
coord_flip() +
RotatedAxis() +
scale_color_gradient2(high='red', mid='grey95', low='blue')
# plot output
p
![](https://img.haomeiwen.com/i14607083/a71292f0d5339ed0.png)
# Plot INH-M4 hME using Seurat VlnPlot function
p <- VlnPlot(
seurat_obj,
features = 'INH-M12',
group.by = 'cell_type',
pt.size = 0 # don't show actual data points
)
# add box-and-whisker plots on top:
p <- p + geom_boxplot(width=.25, fill='white')
# change axis labels and remove legend:
p <- p + xlab('') + ylab('hME') + NoLegend()
# plot output
p
![](https://img.haomeiwen.com/i14607083/da0639c9ef27f6d5.png)
Referenece:
-
Morabito et al., Cell Reports Methods (2023)
-
Morabito & Miyoshi et al., Nature Genetics (2021)
本文使用 文章同步助手 同步
网友评论