美文网首页
WGCNA analysis

WGCNA analysis

作者: wkzhang81 | 来源:发表于2023-11-08 11:55 被阅读0次

    The official website for WGCNA package:

    GitHub - cran/WGCNA: :exclamation: This is a read-only mirror of the CRAN R package repository. WGCNA — Weighted Correlation Network Analysis. Homepage: http://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/

    Instructive descriptions on WGCNA in Chinese:

    加权基因共表达网络分析 (WGCNA)| 共表达网络模块构建 |生物信息学数据分析必备技能 (zhihu.com)
    WGCNA分析,简单全面的最新教程 - 简书 (jianshu.com)
    WGCNA是我最拿手的教程 | 生信菜鸟团 (bio-info-trainee.com)

    A jupyter-notebook-based WGCNA procedure:

    • Package loading
    p_load(tidyverse, WGCNA, repr, reshape2, pheatmap)
    options(repr.plot.res=150)
    options(stringsAsFactors = FALSE)
    enableWGCNAThreads()
    
    # Output
    Allowing parallel execution with up to 15 working processes.
    
    • Data loading
    exprMat <- "tpm_matrix_WGCNA.csv"
    type <- "unsigned"
      #"signed" could be chosen alternatively;
    corType <- "pearson"
      #"bicor" could be chosen alternatively;
    maxPOutliers <- ifelse(corType=="pearson",1,0.05)
    robustY <- ifelse(corType=="pearson",T,F)
    dataExpr <- read.csv(exprMat, row.names = 1)
    # dataExpr <- filter_all(dataExpr, any_vars(. > 1))
      # optional
      # filter the dataExpr could reduce the time and source required for the analysis
    dataExpr <- as.data.frame(t(dataExpr))
    dim(dataExpr)
    
    # Output
    nSample   nGene
    
    • Data filtering
    gsg = goodSamplesGenes(dataExpr, verbose = F)
    dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
    nGenes = ncol(dataExpr)
    nSamples = nrow(dataExpr)
    dim(dataExpr)
    
    # Output
    nSample   filtered_nGene
    
    • Soft threshold picking
    powers <- c(c(1:10), seq(from = 12, to=30, by=2))
    sft <- pickSoftThreshold(dataExpr, powerVector = powers, networkType=type, verbose=5)
    
    options(repr.plot.height=4, repr.plot.width=8)
    par(mfrow = c(1,2))
    cex1 <- 0.9
    plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
         xlab="Soft Threshold (power)",
         ylab="Scale Free Topology Model Fit,signed R^2",type="n",
         main = paste("Scale independence"))
    text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
         labels=powers,cex=cex1,col="red")
    # Selection standard: R-square=0.85
    abline(h=0.85,col="red")
    
    plot(sft$fitIndices[,1], sft$fitIndices[,5],
         xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
         main = paste("Mean connectivity"))
    text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, 
         cex=cex1, col="red")
    
    R-square table
    Scale independence and connectivity
    • Choose the right power (soft threshold) for the following analysis
    power <- sft$powerEstimate
    if (power > 6) { power <- 6 } else { power <- power }
    power
      # soft threshold should not exceed 6 for unsigned network and
        # 12 for signed network, according to the author of WGCNA.
    
    # Output
    3
      #In this example, power = 3 is already enough to make R-square > 0.85
    
    • Construction of TOM matrix
      Caution: this step is extremely time and resource consuming.
    net <- blockwiseModules(dataExpr, power = power, maxBlockSize = nGenes,
                           TOMType = type, minModuleSize = 30,
                           reassignThreshold = 0, mergeCutHeight = 0.25,
                           numericLabels = TRUE, pamRespectsDendro = FALSE,
                           saveTOMs=TRUE, corType = corType, 
                           maxPOutliers=maxPOutliers, loadTOMs=TRUE,
                           saveTOMFileBase = paste0(exprMat, ".tom"),
                           verbose = 3)
      # the **TOM matrix** will be stored with the tpm_matrix file as "tpm_matrix_WGCNA.csv.tom-block.1.RData".
    net <- saveRDS("net.rds")
      # the constructed **net** file will be stored as "net.rds".
    ### Next time, these files could be directly imported into R environment for further analysis without re-calculation.
    ### net <- readRDS("net.red")
    
    • Cluster dendrogram
    # Convert labels to colors for plotting
    moduleLabels = net$colors
    moduleColors = labels2colors(moduleLabels)
    # Plot the dendrogram and the module colors underneath
    options(repr.plot.height = 6, repr.plot.width = 12)
    plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
                        "Module colors",
                        dendroLabels = FALSE, hang = 0.03,
                        addGuide = TRUE, guideHang = 0.05)
    table(moduleColors)
    
    table of module colors
    cluster dendrogram with module colors
    • Network heatmap plot for selected genes
    load(net$TOMFiles[1], verbose = F)
    TOM <- as.matrix(TOM)
    dissTOM <- 1-TOM
    nSelect <- 500 # randomly select 500 genes
    set.seed(666)
    select <- sample(nGenes, size = nSelect)
    selectTOM <- dissTOM[select, select]
    selectTree <- hclust(as.dist(selectTOM), method = "average")
    selectColors <- moduleColors[select]
    plotDiss <- selectTOM ^ 7
    diag(plotDiss) <- NA
    TOMplot(plotDiss, selectTree, selectColors, col = colorRampPalette(c("red","orange","lemonchiffon"))(256), main = "Network heatmap plot, selected genes")
    
    Network heatmap plot
    • module eigengene expression pattern
    MEs = net$MEs
    MEs_col = MEs
    colnames(MEs_col) = paste0("ME", labels2colors(
      as.numeric(str_replace_all(colnames(MEs),"ME",""))))
    MEs_col = orderMEs(MEs_col)
    options(repr.plot.height=6, repr.plot.width=8)
    pheatmap(t(MEs_col), border = F, 
             col = colorRampPalette(c("blue", "white", "red"))(256), 
             cluster_cols = F, gaps_col = c(3,6,9,12), angle_col =45, 
             scale = "row")
    table(moduleColors)
    
    Eigengene expression heatmap
    • Eigengene and phenotype adjacency heatmap
    options(repr.plot.res=150, repr.plot.height=8, repr.plot.width=8)
    # plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap", 
    #                        marDendro = c(3,3,2,4),
    #                        marHeatmap = c(3,4,2,2), plotDendrograms = T, 
    #                        xLabelsAngle = 90)
      # Eigengene adjacency without phenotype.
    
    trait <- "phenotype.csv"
    if(trait != "") {
      traitData <- read.csv(file=trait, row.names=1)
    MEs_colpheno = orderMEs(cbind(MEs_col, traitData))
    plotEigengeneNetworks(MEs_colpheno, "Eigengene adjacency heatmap", 
                          marDendro = c(3,3,2,4),
                          marHeatmap = c(3,4,2,2), plotDendrograms = T, 
                          xLabelsAngle = 90)
     # Eigengene adjacency with phenotype.
    
    Eigengene adjacency heatmap
    • Sample dendrogram and phenotype heatmap
    dataExpr_tree <- hclust(dist(dataExpr), method = "average")
    par(mar = c(0,5,2,0))
    
    #plot(dataExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, 
    #     cex.axis = 1, cex.main = 1,cex.lab = 1)
    # sample_colors <- numbers2colors(as.numeric(factor(datTraits$Tumor.Type)), 
    #                                colors = c("white","blue","red","green"),signed = FALSE)
      # for qualitative phenotypes.
    
    sample_colors <- numbers2colors(traitRows, signed = FALSE)
    par(mar = c(1,4,3,1),cex=0.8)
    plotDendroAndColors(dataExpr_tree, sample_colors,
                        groupLabels = colnames(sample),
                        cex.dendroLabels = 0.8,
                        marAll = c(1, 4, 3, 1),
                        cex.rowText = 0.01,
                        main = "Sample dendrogram and phenotype heatmap")
      # for quantitative phenotypes
    
    Sample dendrogram and phenotype heatmap
    • Module-phenotype relationships
    options(repr.plot.height = 12)
    if (corType=="pearson") {
      modTraitCor = cor(MEs_col, traitData, use = "p")
      modTraitP = corPvalueStudent(modTraitCor, nSamples)
    } else {
      modTraitCorP = bicorAndPvalue(MEs_col, traitData, robustY=robustY)
      modTraitCor = modTraitCorP$bicor
      modTraitP   = modTraitCorP$p
    }
    
    textMatrix = paste(signif(modTraitCor, 2), "\n(", signif(modTraitP, 1), ")", sep = "")
    dim(textMatrix) = dim(modTraitCor)
    labeledHeatmap(Matrix = modTraitCor, xLabels = colnames(traitData), 
                   yLabels = colnames(MEs_col), 
                   cex.lab = 0.5, 
                   ySymbols = colnames(MEs_col), colorLabels = FALSE, 
                   colors = blueWhiteRed(50), 
                   textMatrix = textMatrix, setStdMargins = FALSE, 
                   cex.text = 0.5, zlim = c(-1,1),
                   main = paste("Module-trait relationships"))
    
    Module-phenotype relationships
    • MM and GS score analysis on certain module
    if (corType=="pearson") {
      geneModuleMembership = as.data.frame(cor(dataExpr, MEs_col, use = "p"))
      MMPvalue = as.data.frame(corPvalueStudent(
                 as.matrix(geneModuleMembership), nSamples))
    } else {
      geneModuleMembershipA = bicorAndPvalue(dataExpr, MEs_col, robustY=robustY)
      geneModuleMembership = geneModuleMembershipA$bicor
      MMPvalue = geneModuleMembershipA$p
    }
    
    if (corType=="pearson") {
      geneTraitCor = as.data.frame(cor(dataExpr, traitData, use = "p"))
      geneTraitP = as.data.frame(corPvalueStudent(as.matrix(geneTraitCor), nSamples))
    } else {
      geneTraitCorA = bicorAndPvalue(dataExpr, traitData, robustY=robustY)
      geneTraitCor = as.data.frame(geneTraitCorA$bicor)
      geneTraitP   = as.data.frame(geneTraitCorA$p)
    }
    
    module = "blue" # module name of your interests.
    pheno = "time" # phenotype of your interests.
    modNames = substring(colnames(MEs_col), 3)
    module_column = match(module, modNames)
    pheno_column = match(pheno,colnames(traitData))
    moduleGenes = moduleColors == module
    par(mfrow = c(1,1))
    verboseScatterplot(abs(geneModuleMembership[moduleGenes, module_column]),
                       abs(geneTraitCor[moduleGenes, pheno_column]),
                       xlab = paste("Module Membership in", module, "module"),
                       ylab = paste("Gene significance for", pheno),
                       main = paste("Module membership vs. gene significance\n"),
                       cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
    
    The module and phenotype correlation
    • heatmap within certain module
    module = "blue" # module name of your interests.
    dat=dataExpr[,moduleColors==module]
    n=t(scale(dat)) 
    pheatmap(n, fontsize = 8, show_colnames = T, show_rownames = F,
                     cluster_cols = F, width = 8, height = 6, angle_col=45,
                     main = paste0("module_",module,"-gene heatmap"))
    
    heatmap within certain module
    • Network extraction for cytoscape
    module = "blue" # module name of your interests.
    gene <- colnames(dataExpr) 
    inModule <- moduleColors==module
    modgene <- gene[inModule]
    modTOM <- TOM[inModule,inModule]
    dimnames(modTOM) <- list(modgene,modgene)
    nTop = 100 # Top 100 genes
    IMConn = softConnectivity(dataExpr[, modgene])
    top = (rank(-IMConn) <= nTop) 
    filter_modTOM <- modTOM[top, top]
    cyt <- exportNetworkToCytoscape(filter_modTOM,
                                     edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
                                     nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
                                     weighted = TRUE,
                                     threshold = 0.15, 
                                     nodeNames = modgene[top], 
                                     nodeAttr = moduleColors[inModule][top])
    
    • Search for hub genes
    # Connectivity calculation
    ADJ <- abs(cor(dataExpr,use = "p"))^ power
    Alldegrees <- intramodularConnectivity(ADJ, moduleColors)
    write.csv(Alldegrees, file = "intramodularConnectivity.csv")
    
    # Module membership and significance calculation
    if (corType=="pearson") {
      geneModuleMembership = as.data.frame(cor(dataExpr, MEs_col, use = "p"))
      MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
    } else {
      geneModuleMembershipA = bicorAndPvalue(dataExpr, MEs_col, robustY=robustY)
      geneModuleMembership = geneModuleMembershipA$bicor
      MMPvalue = geneModuleMembershipA$p
    }
    geneModuleMembership <- cbind(moduleColors, geneModuleMembership)
    geneModuleMembership <- geneModuleMembership[order(geneModuleMembership$moduleColors),]
    write.csv(geneModuleMembership, "geneModuleMembership.csv")
    write.csv(MMPvalue, "MMPvalue.csv")
    
    # Possible hub genes in certain module
    module <- "grey"
    gMM_module <- subset(geneModuleMembership, geneModuleMembership$moduleColors == module)
    gMM_module$gene_name <- rownames(gMM_module)
    gMM_module <- gMM_module[,c("gene_name",paste("ME",module, sep=""))]
    colnames(gMM_module) <- c("gene_name","gMM")
    MMP_module <- MMPvalue[gMM_module$gene_name,]
    MMP_module$gene_name <- rownames(MMP_module)
    MMP_module <- MMP_module[,c("gene_name",paste("ME",module, sep=""))]
    colnames(MMP_module) <- c("gene_name", "MMP")
    Alldegrees_module <- Alldegrees[gMM_module$gene_name,]
    Alldegrees_module$gene_name <- rownames(Alldegrees_module)
    result_module <- merge(gMM_module, MMP_module, by = "gene_name", sort = F)
    result_module <- merge(result_module, Alldegrees_module, by = "gene_name", sort = F)
    result_module <- result_module[order(-result_module$kWithin),]
    write.csv(result_module, paste("result_module_", module, ".csv", sep = ""), row.names = F)
    p_load(ggplot2, ggrepel)
    result_module_sig <- result_module %>% top_n(5, -log10(MMP))
    options(repr.plot.height = 4, repr.plot.width = 6)
    ggplot(result_module[,c("MMP","kWithin")], aes(-log10(MMP), kWithin)) + 
      geom_point(alpha=0.8, size = 4, colour = module) + 
      xlab(expression("Module membership significance (-log"[10]*"P)")) + ylab(expression("Connectivity (kWithin)")) + 
      geom_label_repel(aes(label=gene_name), data=result_module_sig, box.padding = 0.8, colour = module) +
      annotate("text", x = 1, y = Inf, label = paste("ME",module,sep=""), vjust = 1.5, size = 5, colour = module) +
      theme_classic()
    
    the possible hub genes in certain module

    相关文章

      网友评论

          本文标题:WGCNA analysis

          本文链接:https://www.haomeiwen.com/subject/tsrqwdtx.html