The official website for WGCNA package:
Instructive descriptions on WGCNA in Chinese:
加权基因共表达网络分析 (WGCNA)| 共表达网络模块构建 |生物信息学数据分析必备技能 (zhihu.com)
WGCNA分析,简单全面的最新教程 - 简书 (jianshu.com)
WGCNA是我最拿手的教程 | 生信菜鸟团 (bio-info-trainee.com)
A jupyter-notebook-based WGCNA procedure:
- Package loading
p_load(tidyverse, WGCNA, repr, reshape2, pheatmap)
options(repr.plot.res=150)
options(stringsAsFactors = FALSE)
enableWGCNAThreads()
# Output
Allowing parallel execution with up to 15 working processes.
- Data loading
exprMat <- "tpm_matrix_WGCNA.csv"
type <- "unsigned"
#"signed" could be chosen alternatively;
corType <- "pearson"
#"bicor" could be chosen alternatively;
maxPOutliers <- ifelse(corType=="pearson",1,0.05)
robustY <- ifelse(corType=="pearson",T,F)
dataExpr <- read.csv(exprMat, row.names = 1)
# dataExpr <- filter_all(dataExpr, any_vars(. > 1))
# optional
# filter the dataExpr could reduce the time and source required for the analysis
dataExpr <- as.data.frame(t(dataExpr))
dim(dataExpr)
# Output
nSample nGene
- Data filtering
gsg = goodSamplesGenes(dataExpr, verbose = F)
dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
nGenes = ncol(dataExpr)
nSamples = nrow(dataExpr)
dim(dataExpr)
# Output
nSample filtered_nGene
- Soft threshold picking
powers <- c(c(1:10), seq(from = 12, to=30, by=2))
sft <- pickSoftThreshold(dataExpr, powerVector = powers, networkType=type, verbose=5)
options(repr.plot.height=4, repr.plot.width=8)
par(mfrow = c(1,2))
cex1 <- 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",
ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
# Selection standard: R-square=0.85
abline(h=0.85,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
cex=cex1, col="red")
R-square table
Scale independence and connectivity
- Choose the right power (soft threshold) for the following analysis
power <- sft$powerEstimate
if (power > 6) { power <- 6 } else { power <- power }
power
# soft threshold should not exceed 6 for unsigned network and
# 12 for signed network, according to the author of WGCNA.
# Output
3
#In this example, power = 3 is already enough to make R-square > 0.85
- Construction of TOM matrix
Caution: this step is extremely time and resource consuming.
net <- blockwiseModules(dataExpr, power = power, maxBlockSize = nGenes,
TOMType = type, minModuleSize = 30,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs=TRUE, corType = corType,
maxPOutliers=maxPOutliers, loadTOMs=TRUE,
saveTOMFileBase = paste0(exprMat, ".tom"),
verbose = 3)
# the **TOM matrix** will be stored with the tpm_matrix file as "tpm_matrix_WGCNA.csv.tom-block.1.RData".
net <- saveRDS("net.rds")
# the constructed **net** file will be stored as "net.rds".
### Next time, these files could be directly imported into R environment for further analysis without re-calculation.
### net <- readRDS("net.red")
- Cluster dendrogram
# Convert labels to colors for plotting
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
# Plot the dendrogram and the module colors underneath
options(repr.plot.height = 6, repr.plot.width = 12)
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
table(moduleColors)
table of module colors
cluster dendrogram with module colors
- Network heatmap plot for selected genes
load(net$TOMFiles[1], verbose = F)
TOM <- as.matrix(TOM)
dissTOM <- 1-TOM
nSelect <- 500 # randomly select 500 genes
set.seed(666)
select <- sample(nGenes, size = nSelect)
selectTOM <- dissTOM[select, select]
selectTree <- hclust(as.dist(selectTOM), method = "average")
selectColors <- moduleColors[select]
plotDiss <- selectTOM ^ 7
diag(plotDiss) <- NA
TOMplot(plotDiss, selectTree, selectColors, col = colorRampPalette(c("red","orange","lemonchiffon"))(256), main = "Network heatmap plot, selected genes")
Network heatmap plot
- module eigengene expression pattern
MEs = net$MEs
MEs_col = MEs
colnames(MEs_col) = paste0("ME", labels2colors(
as.numeric(str_replace_all(colnames(MEs),"ME",""))))
MEs_col = orderMEs(MEs_col)
options(repr.plot.height=6, repr.plot.width=8)
pheatmap(t(MEs_col), border = F,
col = colorRampPalette(c("blue", "white", "red"))(256),
cluster_cols = F, gaps_col = c(3,6,9,12), angle_col =45,
scale = "row")
table(moduleColors)
Eigengene expression heatmap
- Eigengene and phenotype adjacency heatmap
options(repr.plot.res=150, repr.plot.height=8, repr.plot.width=8)
# plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",
# marDendro = c(3,3,2,4),
# marHeatmap = c(3,4,2,2), plotDendrograms = T,
# xLabelsAngle = 90)
# Eigengene adjacency without phenotype.
trait <- "phenotype.csv"
if(trait != "") {
traitData <- read.csv(file=trait, row.names=1)
MEs_colpheno = orderMEs(cbind(MEs_col, traitData))
plotEigengeneNetworks(MEs_colpheno, "Eigengene adjacency heatmap",
marDendro = c(3,3,2,4),
marHeatmap = c(3,4,2,2), plotDendrograms = T,
xLabelsAngle = 90)
# Eigengene adjacency with phenotype.
Eigengene adjacency heatmap
- Sample dendrogram and phenotype heatmap
dataExpr_tree <- hclust(dist(dataExpr), method = "average")
par(mar = c(0,5,2,0))
#plot(dataExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2,
# cex.axis = 1, cex.main = 1,cex.lab = 1)
# sample_colors <- numbers2colors(as.numeric(factor(datTraits$Tumor.Type)),
# colors = c("white","blue","red","green"),signed = FALSE)
# for qualitative phenotypes.
sample_colors <- numbers2colors(traitRows, signed = FALSE)
par(mar = c(1,4,3,1),cex=0.8)
plotDendroAndColors(dataExpr_tree, sample_colors,
groupLabels = colnames(sample),
cex.dendroLabels = 0.8,
marAll = c(1, 4, 3, 1),
cex.rowText = 0.01,
main = "Sample dendrogram and phenotype heatmap")
# for quantitative phenotypes
Sample dendrogram and phenotype heatmap
- Module-phenotype relationships
options(repr.plot.height = 12)
if (corType=="pearson") {
modTraitCor = cor(MEs_col, traitData, use = "p")
modTraitP = corPvalueStudent(modTraitCor, nSamples)
} else {
modTraitCorP = bicorAndPvalue(MEs_col, traitData, robustY=robustY)
modTraitCor = modTraitCorP$bicor
modTraitP = modTraitCorP$p
}
textMatrix = paste(signif(modTraitCor, 2), "\n(", signif(modTraitP, 1), ")", sep = "")
dim(textMatrix) = dim(modTraitCor)
labeledHeatmap(Matrix = modTraitCor, xLabels = colnames(traitData),
yLabels = colnames(MEs_col),
cex.lab = 0.5,
ySymbols = colnames(MEs_col), colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix, setStdMargins = FALSE,
cex.text = 0.5, zlim = c(-1,1),
main = paste("Module-trait relationships"))
Module-phenotype relationships
- MM and GS score analysis on certain module
if (corType=="pearson") {
geneModuleMembership = as.data.frame(cor(dataExpr, MEs_col, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(
as.matrix(geneModuleMembership), nSamples))
} else {
geneModuleMembershipA = bicorAndPvalue(dataExpr, MEs_col, robustY=robustY)
geneModuleMembership = geneModuleMembershipA$bicor
MMPvalue = geneModuleMembershipA$p
}
if (corType=="pearson") {
geneTraitCor = as.data.frame(cor(dataExpr, traitData, use = "p"))
geneTraitP = as.data.frame(corPvalueStudent(as.matrix(geneTraitCor), nSamples))
} else {
geneTraitCorA = bicorAndPvalue(dataExpr, traitData, robustY=robustY)
geneTraitCor = as.data.frame(geneTraitCorA$bicor)
geneTraitP = as.data.frame(geneTraitCorA$p)
}
module = "blue" # module name of your interests.
pheno = "time" # phenotype of your interests.
modNames = substring(colnames(MEs_col), 3)
module_column = match(module, modNames)
pheno_column = match(pheno,colnames(traitData))
moduleGenes = moduleColors == module
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, module_column]),
abs(geneTraitCor[moduleGenes, pheno_column]),
xlab = paste("Module Membership in", module, "module"),
ylab = paste("Gene significance for", pheno),
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
The module and phenotype correlation
- heatmap within certain module
module = "blue" # module name of your interests.
dat=dataExpr[,moduleColors==module]
n=t(scale(dat))
pheatmap(n, fontsize = 8, show_colnames = T, show_rownames = F,
cluster_cols = F, width = 8, height = 6, angle_col=45,
main = paste0("module_",module,"-gene heatmap"))
heatmap within certain module
- Network extraction for cytoscape
module = "blue" # module name of your interests.
gene <- colnames(dataExpr)
inModule <- moduleColors==module
modgene <- gene[inModule]
modTOM <- TOM[inModule,inModule]
dimnames(modTOM) <- list(modgene,modgene)
nTop = 100 # Top 100 genes
IMConn = softConnectivity(dataExpr[, modgene])
top = (rank(-IMConn) <= nTop)
filter_modTOM <- modTOM[top, top]
cyt <- exportNetworkToCytoscape(filter_modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.15,
nodeNames = modgene[top],
nodeAttr = moduleColors[inModule][top])
- Search for hub genes
# Connectivity calculation
ADJ <- abs(cor(dataExpr,use = "p"))^ power
Alldegrees <- intramodularConnectivity(ADJ, moduleColors)
write.csv(Alldegrees, file = "intramodularConnectivity.csv")
# Module membership and significance calculation
if (corType=="pearson") {
geneModuleMembership = as.data.frame(cor(dataExpr, MEs_col, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
} else {
geneModuleMembershipA = bicorAndPvalue(dataExpr, MEs_col, robustY=robustY)
geneModuleMembership = geneModuleMembershipA$bicor
MMPvalue = geneModuleMembershipA$p
}
geneModuleMembership <- cbind(moduleColors, geneModuleMembership)
geneModuleMembership <- geneModuleMembership[order(geneModuleMembership$moduleColors),]
write.csv(geneModuleMembership, "geneModuleMembership.csv")
write.csv(MMPvalue, "MMPvalue.csv")
# Possible hub genes in certain module
module <- "grey"
gMM_module <- subset(geneModuleMembership, geneModuleMembership$moduleColors == module)
gMM_module$gene_name <- rownames(gMM_module)
gMM_module <- gMM_module[,c("gene_name",paste("ME",module, sep=""))]
colnames(gMM_module) <- c("gene_name","gMM")
MMP_module <- MMPvalue[gMM_module$gene_name,]
MMP_module$gene_name <- rownames(MMP_module)
MMP_module <- MMP_module[,c("gene_name",paste("ME",module, sep=""))]
colnames(MMP_module) <- c("gene_name", "MMP")
Alldegrees_module <- Alldegrees[gMM_module$gene_name,]
Alldegrees_module$gene_name <- rownames(Alldegrees_module)
result_module <- merge(gMM_module, MMP_module, by = "gene_name", sort = F)
result_module <- merge(result_module, Alldegrees_module, by = "gene_name", sort = F)
result_module <- result_module[order(-result_module$kWithin),]
write.csv(result_module, paste("result_module_", module, ".csv", sep = ""), row.names = F)
p_load(ggplot2, ggrepel)
result_module_sig <- result_module %>% top_n(5, -log10(MMP))
options(repr.plot.height = 4, repr.plot.width = 6)
ggplot(result_module[,c("MMP","kWithin")], aes(-log10(MMP), kWithin)) +
geom_point(alpha=0.8, size = 4, colour = module) +
xlab(expression("Module membership significance (-log"[10]*"P)")) + ylab(expression("Connectivity (kWithin)")) +
geom_label_repel(aes(label=gene_name), data=result_module_sig, box.padding = 0.8, colour = module) +
annotate("text", x = 1, y = Inf, label = paste("ME",module,sep=""), vjust = 1.5, size = 5, colour = module) +
theme_classic()
the possible hub genes in certain module
网友评论