美文网首页
转录组专题:WGCNA

转录组专题:WGCNA

作者: 挽山 | 来源:发表于2020-02-26 22:16 被阅读0次

    要注意的就一个是软阈值那块几个参数。

    #source("http://bioconductor.org/biocLite.R")
    #biocLite(c("AnnotationDbi","impute","GO.db","preprocessCore"))
    #install.packages("WGCNA")
    library(WGCNA)
    
    #  数据准备
    fpkm<-read.table(file.choose(), sep = '\t', header = T, stringsAsFactors = F) #file=归一化后的表达矩阵,行为基因,列为样本
    
    #  查看数据信息
    head(fpkm)
    dim(fpkm)
    names(fpkm)
    
    #  设置行名(第一列作为行名)
    row.names(fpkm)<-fpkm[,1]
    fpkm<-fpkm[,-1]
    
    #  WGCNA要求转置数据格式
    fpkm_t<-t(fpkm)
    
    gsg = goodSamplesGenes(fpkm_t, verbose = 3)
    gsg$allOK
    if (!gsg$allOK)
    {
      # Optionally, print the gene and sample names that were removed:
      if (sum(!gsg$goodGenes)>0)
        printFlush(paste("Removed genes:", paste(names(fpkm_t)[!gsg$goodGenes], collapse = ", ")));
      if (sum(!gsg$goodSamples)>0)
        printFlush(paste("Removed samples:", paste(rownames(fpkm_t)[!gsg$goodSamples], collapse = ", ")));
      # Remove the offending genes and samples from the data:
      fpkm_t = fpkm_t[gsg$goodSamples, gsg$goodGenes]
    }
    gsg$goodSamples
    
    sampleTree = hclust(dist(fpkm_t), method = "average");
    # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
    # The user should change the dimensions if the window is too large or too small.
    sizeGrWindow(12,9)
    #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
    par(cex = 0.6);
    par(mar = c(0,4,2,0))
    plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
    # Plot a line to show the cut
    abline(h = 8000, col = "red");
    # Determine cluster under the line
    # 保存图片 Sample clustering to detect outliers
    
    clust = cutreeStatic(sampleTree, cutHeight = 8000, minSize = 10) ## 把高于8000的切除
    table(clust) # 0代表切除的,1代表保留的
    keepSamples = (clust==1)
    datExpr = fpkm_t[keepSamples, ]
    nGenes = ncol(datExpr)
    nSamples = nrow(datExpr)
    
    # Choose a set of soft-thresholding powers 决定模块内基因数量
    powers = c(c(1:10), seq(from = 12, to=20, by=2))
    sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
    sizeGrWindow(9, 5)
    par(mfrow = c(1,2));
    cex1 = 0.9;
    plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
         xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
         main = paste("Scale independence"));
    text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
         labels=powers,cex=cex1,col="red");
    abline(h=0.85,col="red")
    abline(h=0.9,col="red")
    plot(sft$fitIndices[,1], sft$fitIndices[,5],
         xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
         main = paste("Mean connectivity"))
    text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
    # 截图,命名为power.png
    # 保存图片Scale independence & Mean connectivity
    sft
    sft$powerEstimate # 最低要选sft > 0.85的,红线划到0.9
    
    ### 挖模块(一步法)-----------------------------------------------------------------
    net =blockwiseModules(datExpr, power = 12, #选择软阈值
                          maxBlockSize = 5000,
                          TOMType = "unsigned", 
                          minModuleSize = 20, 
                          reassignThreshold = 0, 
                          mergeCutHeight = 0.15, # 计算完所有modules后将特征量高度相似的modules进行和并,标准:所有<mergeCutHeight的值,默认0.15,决定模块数量 
                          numericLabels = TRUE, 
                          pamRespectsDendro = FALSE,
                          saveTOMs = TRUE,
                          saveTOMFileBase = "PFC_PPI_network",# 改
                          verbose = 3)
    MEs = net$MEs # 模块特征值:一个关于modules的特征量矩阵,行数等于筛选的modules数,列数等于样本数
    table(net$colors)
    length(net$colors) 
    length(table(net$colors)) 
    table(labels2colors(net$colors))
    #---------------------------------------------------------------------------------------------
    #导出模块表格
    #source("https://bioconductor.org/biocLite.R")
    #biocLite("marray")
    library(marray) # write.list要用
    write.list(net,"7.3-CC-10-0.15_net.txt")
    write.table(datExpr,"7.3-CC-10-0.15_res.txt",quote=FALSE,sep="\t")
    
    #处理去重后得到模块号和颜色对应的文件,用于匹配result和net的结果
    write.table(net$colors, "7.1-CC-net$colors-10-0.15.txt",row.names = F,quote = F,sep="\t")
    write.table(labels2colors(net$colors), "7.1-CC-labels2colors-10-0.15.txt",row.names = F,quote = F,sep="\t")
    
    #画聚类树(判断模块质量)---------------------------------------------------------------
    geneTree = net$dendrograms[[1]]
    # open a graphics window
    sizeGrWindow(12, 9)
    # Convert labels to colors for plotting
    mergedColors = labels2colors(net$colors)
    # Plot the dendrogram and the module colors underneath
    tree_52<-plotDendroAndColors(geneTree, mergedColors[net$blockGenes[[1]]], # geneTree = net$dendrograms[[1]]
                        "Module colors",
                        dendroLabels = FALSE, hang = 0.03,
                        addGuide = TRUE, guideHang = 0.05,
                        main = 'Cluster Dendrogram For CC set')
    # 保存图片 Cluster Dendrogram
    
    #画热图----------------------------------------------------------------------------------
    moduleColors = labels2colors(net$colors) 
    
    TOM = TOMsimilarityFromExpr(datExpr, power = 10)#####
    TOM0 = 1-TOM; # 生成全基因不相似TOM矩阵,比如1-(),把得到的不相关矩阵加幂,这样绘制的TOM图色彩差异会比较明显。
    plotTOM = TOM0^7;
    
    TOMplot<-TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot")
    # 保存图片 Network heatmap plot
    #cex.lab可以更改X轴Y轴label字体的大小,cex.text可以更改热图中字体的大小,colors可以改变颜色。
    
    #导出到Cytoscape ---------------------------------------------------------
    # Select modules需要修改,选择需要导出的模块颜色
    modules = unique(mergedColors)
    # modules =c("blue","yellow","red")
    
    mergedColors = labels2colors(net$colors)
    # Select module probes选择模块探测
    probes = colnames(datExpr)
    inModule = is.finite(match(mergedColors, modules));
    modProbes = probes[inModule];
    #modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
    # Select the corresponding Topological Overlap
    modTOM = TOM[inModule, inModule];
    dimnames(modTOM) = list(modProbes, modProbes)
    
    # Export the network into edge and node list files Cytoscape can read
    cyt = exportNetworkToCytoscape(modTOM,
                                   edgeFile = paste("7.2_CC_10_0.15_edges_cytoscape",".txt", sep=""),
                                   nodeFile = paste("7.2_CC_10_0.15_nodes_cytoscape",".txt", sep=""),
                                   weighted = TRUE,
                                   threshold = 0.02,#默认
                                   nodeNames = modProbes,
                                   altNodeNames = modProbes,
                                   nodeAttr = mergedColors[inModule])
    
    #hub基因的识别 方法一------------------------------------------------------------
    #hub gene(使用3个标准来筛选枢纽基因:基因与指定模块显著性 > 0.2, greenyellow Module membership value > 0.8, q.weighted(性状关联) < 0.01)
    #1. connectivity
    ADJ1=abs(cor(datExpr,use="p"))^6
    Alldegrees1=intramodularConnectivity(ADJ1, net$colors=='blue')# colorh1代表module的颜色
    head(Alldegrees1) #KTotal代表该基因的所有边的连接度,KWithin代表和该基因位于同一个module下的边的连接度,KOut代表和该基因位于不同module下的边的连接度,所以KTotal是KWithin和KOut之和,KDiff代表KWithin和KOut的差值。在module中,会存在hub gene的概念,所谓的hub gene, 就是该module下连接度最大的基因,注意此时只考虑位于该module下的边,就是上文的KWithin。
    #2. module member-ship(MM)
    datKME = signedKME(datExpr,MEs,outputColumnName="MM.")
    head(datKME)
    #3. gene signigicancer(GS) 与表型数据相关性
    GS1=as.numeric(cor(y,datExpr, use="p"))
    
    #方法二------------------------------------------------------------------------------------
    ##直接筛选某个模块的hub gene
    #FilterGenes<- abs(GS1)> .2 & abs(datKME$MM.brown)>.8
    datKME = signedKME(datExpr,MEs,outputColumnName="MM.")
    head(datKME)
    
    FilterGenes<- abs(datKME$MM.2)>.9 #根据需要选择模块号
    
    FilterGenes_res<-colnames(fpkm_t)[FilterGenes];length(FilterGenes_res)
    write(paste('Module', 'Gene'),'10-FilterGenes_hub_0.8.txt',sep = '\t',append=T)
    write(paste('CC_M2(blue)', FilterGenes_res),'10-FilterGenes_hub_0.9.txt',sep = '\t',append=T)
    
    # hub 基因热图 -----------------------------------------------------------------------------
    plotNetworkHeatmap(fpkm_t, 
                       plotGenes = paste(FilterGenes_res,sep = ""), 
                       networkType = "unsigned", 
                       useTOM = TRUE, 
                       power=10,
                       main="hub gene |KME| > 0.95 "
                      )
    
    # 导出hub基因到Cytoscape ---------------------------------------------------------------
    TOM = TOMsimilarityFromExpr(fpkm_t, power = 10)#####
    hubGene_TOM <- TOM[FilterGenes,FilterGenes] 
    dimnames(hubGene_TOM) = list(colnames(fpkm_t)[FilterGenes], 
                                 colnames(fpkm_t)[FilterGenes]) 
    cyt = exportNetworkToCytoscape(hubGene_TOM, 
                                   edgeFile = paste("Cytoscape_hub_edges", ".txt", sep=""), 
                                   #nodeFile = paste("Cytoscape_hub_nodes", ".txt", sep=""), 
                                   weighted = TRUE, 
                                   threshold = 0.02, 
                                   nodeNames = FilterGenes_res, 
                                   altNodeNames = F, 
                                   nodeAttr = F )
    
    #其他方法---------------------------------------------------------------------------------
    #hub gene,只给一个
    moduleColors = unique(mergedColors)
    HubGenes <- chooseTopHubInEachModule(datExpr,moduleColors)
    write.table (HubGenes,file = "HubGenes_of_each_module.txt",quote=F,sep='\t')
    

    相关文章

      网友评论

          本文标题:转录组专题:WGCNA

          本文链接:https://www.haomeiwen.com/subject/ncqyqhtx.html