WGCNA analysis

作者: wkzhang81 | 来源:发表于2023-11-08 11:55 被阅读0次

转录组测序技术和结果解读（十三）——WGCNA分析
【科研猫】WGCNA共表达网络分析（1）
【R语言】WGCNA分析blockwiseModules函数报错
WGCNA原理和分析流程
WGCNA
R语言安装WGCNA，碰到依赖包无法安装的情况
WGCNA 学习
WGCNA分析 | 你的数据结果真的是准确的吗？？
就拿TCGA的乳腺癌RNA-seq数据来做个WGCNA示例吧！
使用WGCNA进行meta-analysis

The official website for WGCNA package:

GitHub - cran/WGCNA: :exclamation: This is a read-only mirror of the CRAN R package repository. WGCNA — Weighted Correlation Network Analysis. Homepage: http://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/

Instructive descriptions on WGCNA in Chinese:

加权基因共表达网络分析 (WGCNA）| 共表达网络模块构建 |生物信息学数据分析必备技能 (zhihu.com)
WGCNA分析，简单全面的最新教程 - 简书 (jianshu.com)
WGCNA是我最拿手的教程 | 生信菜鸟团 (bio-info-trainee.com)

A jupyter-notebook-based WGCNA procedure:

Package loading

p_load(tidyverse, WGCNA, repr, reshape2, pheatmap)
options(repr.plot.res=150)
options(stringsAsFactors = FALSE)
enableWGCNAThreads()

# Output
Allowing parallel execution with up to 15 working processes.

Data loading

exprMat <- "tpm_matrix_WGCNA.csv"
type <- "unsigned"
  #"signed" could be chosen alternatively;
corType <- "pearson"
  #"bicor" could be chosen alternatively;
maxPOutliers <- ifelse(corType=="pearson",1,0.05)
robustY <- ifelse(corType=="pearson",T,F)
dataExpr <- read.csv(exprMat, row.names = 1)
# dataExpr <- filter_all(dataExpr, any_vars(. > 1))
  # optional
  # filter the dataExpr could reduce the time and source required for the analysis
dataExpr <- as.data.frame(t(dataExpr))
dim(dataExpr)

# Output
nSample   nGene

Data filtering

gsg = goodSamplesGenes(dataExpr, verbose = F)
dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
nGenes = ncol(dataExpr)
nSamples = nrow(dataExpr)
dim(dataExpr)

# Output
nSample   filtered_nGene

Soft threshold picking

powers <- c(c(1:10), seq(from = 12, to=30, by=2))
sft <- pickSoftThreshold(dataExpr, powerVector = powers, networkType=type, verbose=5)

options(repr.plot.height=4, repr.plot.width=8)
par(mfrow = c(1,2))
cex1 <- 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",
     ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red")
# Selection standard: R-square=0.85
abline(h=0.85,col="red")

plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, 
     cex=cex1, col="red")

R-square table

Scale independence and connectivity

Choose the right power (soft threshold) for the following analysis

power <- sft$powerEstimate
if (power > 6) { power <- 6 } else { power <- power }
power
  # soft threshold should not exceed 6 for unsigned network and
    # 12 for signed network, according to the author of WGCNA.

# Output
3
  #In this example, power = 3 is already enough to make R-square > 0.85

Construction of TOM matrix
Caution: this step is extremely time and resource consuming.

net <- blockwiseModules(dataExpr, power = power, maxBlockSize = nGenes,
                       TOMType = type, minModuleSize = 30,
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs=TRUE, corType = corType, 
                       maxPOutliers=maxPOutliers, loadTOMs=TRUE,
                       saveTOMFileBase = paste0(exprMat, ".tom"),
                       verbose = 3)
  # the **TOM matrix** will be stored with the tpm_matrix file as "tpm_matrix_WGCNA.csv.tom-block.1.RData".
net <- saveRDS("net.rds")
  # the constructed **net** file will be stored as "net.rds".
### Next time, these files could be directly imported into R environment for further analysis without re-calculation.
### net <- readRDS("net.red")

Cluster dendrogram

# Convert labels to colors for plotting
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
# Plot the dendrogram and the module colors underneath
options(repr.plot.height = 6, repr.plot.width = 12)
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
table(moduleColors)

table of module colors

cluster dendrogram with module colors

Network heatmap plot for selected genes

load(net$TOMFiles[1], verbose = F)
TOM <- as.matrix(TOM)
dissTOM <- 1-TOM
nSelect <- 500 # randomly select 500 genes
set.seed(666)
select <- sample(nGenes, size = nSelect)
selectTOM <- dissTOM[select, select]
selectTree <- hclust(as.dist(selectTOM), method = "average")
selectColors <- moduleColors[select]
plotDiss <- selectTOM ^ 7
diag(plotDiss) <- NA
TOMplot(plotDiss, selectTree, selectColors, col = colorRampPalette(c("red","orange","lemonchiffon"))(256), main = "Network heatmap plot, selected genes")

Network heatmap plot

module eigengene expression pattern

MEs = net$MEs
MEs_col = MEs
colnames(MEs_col) = paste0("ME", labels2colors(
  as.numeric(str_replace_all(colnames(MEs),"ME",""))))
MEs_col = orderMEs(MEs_col)
options(repr.plot.height=6, repr.plot.width=8)
pheatmap(t(MEs_col), border = F, 
         col = colorRampPalette(c("blue", "white", "red"))(256), 
         cluster_cols = F, gaps_col = c(3,6,9,12), angle_col =45, 
         scale = "row")
table(moduleColors)

Eigengene expression heatmap

Eigengene and phenotype adjacency heatmap

options(repr.plot.res=150, repr.plot.height=8, repr.plot.width=8)
# plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap", 
#                        marDendro = c(3,3,2,4),
#                        marHeatmap = c(3,4,2,2), plotDendrograms = T, 
#                        xLabelsAngle = 90)
  # Eigengene adjacency without phenotype.

trait <- "phenotype.csv"
if(trait != "") {
  traitData <- read.csv(file=trait, row.names=1)
MEs_colpheno = orderMEs(cbind(MEs_col, traitData))
plotEigengeneNetworks(MEs_colpheno, "Eigengene adjacency heatmap", 
                      marDendro = c(3,3,2,4),
                      marHeatmap = c(3,4,2,2), plotDendrograms = T, 
                      xLabelsAngle = 90)
 # Eigengene adjacency with phenotype.

Eigengene adjacency heatmap

Sample dendrogram and phenotype heatmap

dataExpr_tree <- hclust(dist(dataExpr), method = "average")
par(mar = c(0,5,2,0))

#plot(dataExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, 
#     cex.axis = 1, cex.main = 1,cex.lab = 1)
# sample_colors <- numbers2colors(as.numeric(factor(datTraits$Tumor.Type)), 
#                                colors = c("white","blue","red","green"),signed = FALSE)
  # for qualitative phenotypes.

sample_colors <- numbers2colors(traitRows, signed = FALSE)
par(mar = c(1,4,3,1),cex=0.8)
plotDendroAndColors(dataExpr_tree, sample_colors,
                    groupLabels = colnames(sample),
                    cex.dendroLabels = 0.8,
                    marAll = c(1, 4, 3, 1),
                    cex.rowText = 0.01,
                    main = "Sample dendrogram and phenotype heatmap")
  # for quantitative phenotypes

Sample dendrogram and phenotype heatmap

Module-phenotype relationships

options(repr.plot.height = 12)
if (corType=="pearson") {
  modTraitCor = cor(MEs_col, traitData, use = "p")
  modTraitP = corPvalueStudent(modTraitCor, nSamples)
} else {
  modTraitCorP = bicorAndPvalue(MEs_col, traitData, robustY=robustY)
  modTraitCor = modTraitCorP$bicor
  modTraitP   = modTraitCorP$p
}

textMatrix = paste(signif(modTraitCor, 2), "\n(", signif(modTraitP, 1), ")", sep = "")
dim(textMatrix) = dim(modTraitCor)
labeledHeatmap(Matrix = modTraitCor, xLabels = colnames(traitData), 
               yLabels = colnames(MEs_col), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs_col), colorLabels = FALSE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix, setStdMargins = FALSE, 
               cex.text = 0.5, zlim = c(-1,1),
               main = paste("Module-trait relationships"))

Module-phenotype relationships

MM and GS score analysis on certain module

if (corType=="pearson") {
  geneModuleMembership = as.data.frame(cor(dataExpr, MEs_col, use = "p"))
  MMPvalue = as.data.frame(corPvalueStudent(
             as.matrix(geneModuleMembership), nSamples))
} else {
  geneModuleMembershipA = bicorAndPvalue(dataExpr, MEs_col, robustY=robustY)
  geneModuleMembership = geneModuleMembershipA$bicor
  MMPvalue = geneModuleMembershipA$p
}

if (corType=="pearson") {
  geneTraitCor = as.data.frame(cor(dataExpr, traitData, use = "p"))
  geneTraitP = as.data.frame(corPvalueStudent(as.matrix(geneTraitCor), nSamples))
} else {
  geneTraitCorA = bicorAndPvalue(dataExpr, traitData, robustY=robustY)
  geneTraitCor = as.data.frame(geneTraitCorA$bicor)
  geneTraitP   = as.data.frame(geneTraitCorA$p)
}

module = "blue" # module name of your interests.
pheno = "time" # phenotype of your interests.
modNames = substring(colnames(MEs_col), 3)
module_column = match(module, modNames)
pheno_column = match(pheno,colnames(traitData))
moduleGenes = moduleColors == module
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, module_column]),
                   abs(geneTraitCor[moduleGenes, pheno_column]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = paste("Gene significance for", pheno),
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

The module and phenotype correlation

heatmap within certain module

module = "blue" # module name of your interests.
dat=dataExpr[,moduleColors==module]
n=t(scale(dat)) 
pheatmap(n, fontsize = 8, show_colnames = T, show_rownames = F,
                 cluster_cols = F, width = 8, height = 6, angle_col=45,
                 main = paste0("module_",module,"-gene heatmap"))

heatmap within certain module

Network extraction for cytoscape

module = "blue" # module name of your interests.
gene <- colnames(dataExpr) 
inModule <- moduleColors==module
modgene <- gene[inModule]
modTOM <- TOM[inModule,inModule]
dimnames(modTOM) <- list(modgene,modgene)
nTop = 100 # Top 100 genes
IMConn = softConnectivity(dataExpr[, modgene])
top = (rank(-IMConn) <= nTop) 
filter_modTOM <- modTOM[top, top]
cyt <- exportNetworkToCytoscape(filter_modTOM,
                                 edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
                                 nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
                                 weighted = TRUE,
                                 threshold = 0.15, 
                                 nodeNames = modgene[top], 
                                 nodeAttr = moduleColors[inModule][top])

Search for hub genes

# Connectivity calculation
ADJ <- abs(cor(dataExpr,use = "p"))^ power
Alldegrees <- intramodularConnectivity(ADJ, moduleColors)
write.csv(Alldegrees, file = "intramodularConnectivity.csv")

# Module membership and significance calculation
if (corType=="pearson") {
  geneModuleMembership = as.data.frame(cor(dataExpr, MEs_col, use = "p"))
  MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
} else {
  geneModuleMembershipA = bicorAndPvalue(dataExpr, MEs_col, robustY=robustY)
  geneModuleMembership = geneModuleMembershipA$bicor
  MMPvalue = geneModuleMembershipA$p
}
geneModuleMembership <- cbind(moduleColors, geneModuleMembership)
geneModuleMembership <- geneModuleMembership[order(geneModuleMembership$moduleColors),]
write.csv(geneModuleMembership, "geneModuleMembership.csv")
write.csv(MMPvalue, "MMPvalue.csv")

# Possible hub genes in certain module
module <- "grey"
gMM_module <- subset(geneModuleMembership, geneModuleMembership$moduleColors == module)
gMM_module$gene_name <- rownames(gMM_module)
gMM_module <- gMM_module[,c("gene_name",paste("ME",module, sep=""))]
colnames(gMM_module) <- c("gene_name","gMM")
MMP_module <- MMPvalue[gMM_module$gene_name,]
MMP_module$gene_name <- rownames(MMP_module)
MMP_module <- MMP_module[,c("gene_name",paste("ME",module, sep=""))]
colnames(MMP_module) <- c("gene_name", "MMP")
Alldegrees_module <- Alldegrees[gMM_module$gene_name,]
Alldegrees_module$gene_name <- rownames(Alldegrees_module)
result_module <- merge(gMM_module, MMP_module, by = "gene_name", sort = F)
result_module <- merge(result_module, Alldegrees_module, by = "gene_name", sort = F)
result_module <- result_module[order(-result_module$kWithin),]
write.csv(result_module, paste("result_module_", module, ".csv", sep = ""), row.names = F)
p_load(ggplot2, ggrepel)
result_module_sig <- result_module %>% top_n(5, -log10(MMP))
options(repr.plot.height = 4, repr.plot.width = 6)
ggplot(result_module[,c("MMP","kWithin")], aes(-log10(MMP), kWithin)) + 
  geom_point(alpha=0.8, size = 4, colour = module) + 
  xlab(expression("Module membership significance (-log"[10]*"P)")) + ylab(expression("Connectivity (kWithin)")) + 
  geom_label_repel(aes(label=gene_name), data=result_module_sig, box.padding = 0.8, colour = module) +
  annotate("text", x = 1, y = Inf, label = paste("ME",module,sep=""), vjust = 1.5, size = 5, colour = module) +
  theme_classic()

the possible hub genes in certain module

转录组测序技术和结果解读（十三）——WGCNA分析
WGCNA WGCNA(weighted correlation network analysis) 是一款基于R...
【科研猫】WGCNA共表达网络分析（1）
WGCNA（Weighted Gene Co-Expression Network Analysis）称为加权基因...
【R语言】WGCNA分析blockwiseModules函数报错
WGCNA（weighted gene co-expression network analysis），直译过来是...
WGCNA原理和分析流程
WGCNA全称weighted gene co-expression network analysis，即加权基因...
WGCNA
加权基因共表达网络分析 (WGCNA, Weighted correlation network analysis...
R语言安装WGCNA，碰到依赖包无法安装的情况
加权基因共表达网络分析 (WGCNA, Weighted correlation network analysis...
WGCNA 学习
WGCNA: weighted gene co-expression network analysis来生信学习班...
WGCNA分析 | 你的数据结果真的是准确的吗？？
加权基因共表达网络分析 (WGCNA, Weighted correlation network analysis...
就拿TCGA的乳腺癌RNA-seq数据来做个WGCNA示例吧！
WGCNA（Weighted Correlation Network analysis）是一个基于基因表达数据，构...
使用WGCNA进行meta-analysis
鉴于大量的转录组数据，可能出现的一个问题是“如果A组和B组都进行了转录组研究并报告了一些结果，这些结果的相容性如何...