WGCNA

作者: MissL_78e0 | 来源:发表于2019-07-25 20:12 被阅读0次

    WGCNA(加权基因共表达网络分析) - 简书

    WGCNA分析,简单全面的最新教程 - 简书

    http://pages.stat.wisc.edu/~yandell/statgen/ucla/WGCNA/wgcna.html

    STEP6:WGCNA相关性分析 - 简书

    学习WGCNA总结 | Public Library of Bioinformatics


    模块数,54个模块,0为不包括在模块内的基因
    模块对应的颜色及基因数

    rm(list = ls())#清空环境变量

    getwd()

    setwd("D:/AAA-台式机-LY-WorkSpace/data-wrangling-with-R/")

    library(WGCNA)

    library(reshape2)

    library(stringr)

    options(stringsAsFactors = F)

    #打开多路线程

    allowWGCNAThreads()#enableWGCNAThreads()

    mycount <- read.csv("D:/AAA-台式机-LY-WorkSpace/data-wrangling-with-R/htseq-count-readcount - 无注释.csv", row.names=1, header=T, check.names=F)

    dim(mycount)

    data <- log2(mycount+1)

    ##筛选方差前25%的基因,上一步是为了减少运算量,因为一个测序数据可能会有好几万个探针,

    #而可能其中很多基因在各个样本中的表达情况并没有什么太大变化,为了减少运算量,这里我们筛选方差前25%的基因。

    m.vars=apply(data,1,var)

    data.upper <- data[which(m.vars > quantile(m.vars, probs = seq(0,1,0.25))[4]),]

    datExpr <- as.data.frame(t(data.upper))#行和列转换位置

    nGenes <- ncol(datExpr)

    nSamples <- nrow(datExpr)

    ##样本聚类检查离群值##

    gsg <- goodSamplesGenes(datExpr, verbose = 3)

    gsg$allOK

    sampleTree <- hclust(dist(datExpr), method = "average")

    plot(sampleTree, main = "sample clustering to detect outliers", sub = "", xlab = "")

    save(datExpr, file = "readcount-01-dataInput.RData")

    ##软阈值筛选##

    # 横轴是Soft threshold (power),纵轴是无标度网络的评估参数,数值越高,

    # 网络越符合无标度特征 (non-scale)

    powers <- c(seq(1,10,by=1), seq(12,20,by=2))

    sft <- pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)

    par(mfrow=c(1,2))

    cex1 = 0.9

    plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

        xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit,signed R^2", type = "n",

        main = paste("scale independence"))

    text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

        labels = powers, cex = cex1, col = "red")

    abline(h=0.90, col="red")

    plot(sft$fitIndices[,1], sft$fitIndices[,5],

        xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",

        main = paste("Mean Connectivity"))

    text(sft$fitIndices[,1], sft$fitIndices[,5], labels = powers, cex = cex1, col = "red")

    # 运行下列代码,让程序推荐你一个power, 数据质量太差啦,程序给了我"NA",自己设定power=14

    sft$powerEstimate

    #推荐的power是4

    ##一步法网络构建:One-step network construction and module detection##

    net <- blockwiseModules(datExpr,power = 4,maxBlockSize = nGenes,

                            TOMType = "unsigned", minModuleSize = 30,

                            reassignThreshold = 0, mergeCutHeight = 0.25,

                            numericLabels = T, pamRespectsDendro = F,

                            saveTOM = T,

                            saveTOMFileBase = "AS-green-readcount-TOM",

                            verbose = 3)

    table(net$colors)

    #层级聚类树展示各个模块

    ## 灰色的为**未分类**到模块的基因。

    # Convert labels to colors for plotting

    moduleLabels = net$colors

    moduleColors = labels2colors(moduleLabels)

    # Plot the dendrogram and the module colors underneath

    # 如果对结果不满意,还可以recutBlockwiseTrees,节省计算时间

    plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],

                        "Module colors",

                        dendroLabels = F, hang = 0.03,

                        addGuide = T, guideHang = 0.05)

    table(moduleColors)

    MEs = net$MEs

    geneTree = net$dendrograms[[1]]

    save(MEs, moduleLabels, geneTree,

        file = "AS-green-readcount-02-networkConstruction-auto.RData")##结果保存###

    ##表型与模块相关性##(没有,下一步)

    moduleLabelsAutomatic = net$colors

    moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)

    moduleColorsWW = moduleColorsAutomatic

    MEs0 = moduleEigengenes(datExpr, moduleColorsWW)$eigengenes

    MEsWW = orderMEs(MEs0)

    samples <- read.csv()

    modTraitor = cor(MEsWW, )

    #绘制模块之间相关性图

    # module eigengene, 可以绘制线图,作为每个模块的基因表达趋势的展示

    MEs = net$MEs

    ### 不需要重新计算,改下列名字就好

    ### 官方教程是重新计算的,其实可以不用这么麻烦

    MEs_col = MEs

    colnames(MEs_col) = paste0("ME", labels2colors(as.numeric(str_replace_all(colnames(MEs),"ME", ""))))

    MEs_col = orderMEs(MEs_col)

    # 根据基因间表达量进行聚类所得到的各模块间的相关性图

    # marDendro/marHeatmap 设置下、左、上、右的边距

    plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",

                          marDendro = c(3,3,2,4),

                          marHeatmap = c(3,4,2,2), plotDendrograms = T,

                          xLabelsAngle = 90)

    #可视化基因网络 (TOM plot)

    # 如果采用分步计算,或设置的blocksize>=总基因数,直接load计算好的TOM结果

    # 否则需要再计算一遍,比较耗费时间

    # TOM = TOMsimilarityFromExpr(dataExpr, power=power, corType=corType, networkType=type)

    load(net$TOMFiles[1], verbose = T)

    TOM <- as.matrix(TOM)

    dissTOM = 1-TOM

    # Transform dissTOM with a power to make moderately strong

    # connections more visible in the heatmap

    plotTOM = dissTOM^7

    # Set diagonal to NA for a nicer plot

    diag(plotTOM) = NA

    #call the plot function

    # 这一部分特别耗时,行列同时做层级聚类

    #TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

    #all genes 耗时太长了,随便选取1000个基因来可视化

    nSelect = 1000

    # For reproducibility, we set the random seed

    set.seed(10)

    select = sample(nGenes, size = nSelect)

    selectTOM = dissTOM[select, select]

    # There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster

    selectTree = hclust(as.dist(selectTOM), method = "average")

    selectColors = moduleColors[select]

    # Open a graphical window

    sizeGrWindow(9,9)

    # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing

    # the color palette; setting the diagonal to NA also improves the clarity of the plot

    plotDiss = selectTOM^7;

    diag(plotDiss) = NA;

    TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")

    相关文章

      网友评论

        本文标题:WGCNA

        本文链接:https://www.haomeiwen.com/subject/pkpkrctx.html