美文网首页生物信息学与算法
GEO2R运行的代码及结果展示

GEO2R运行的代码及结果展示

作者: Stone_Stan4d | 来源:发表于2017-12-17 10:49 被阅读253次

    加载包,设置路径等:

    #   Differential expression analysis with limma
    source("https://bioconductor.org/biocLite.R")
    # biocLite()
    #biocLite("sva")
    getwd()
    setwd("D:/RWork/Rworking")
    library(Biobase)
    library(GEOquery)
    library(limma)
    
    # load series and platform data from GEO
    getwd()
    setwd("D:/RWork")
    dir.create("GEO2R")
    setwd("./GEO2R")
    

    下载数据,是表达矩阵:

    # load series and platform data from GEO
    getwd()
    setwd("D:/RWork")
    dir.create("GEO2R")
    setwd("./GEO2R")
    
    gset <- getGEO("GSE84846", GSEMatrix =TRUE, AnnotGPL=TRUE)
    
    if (length(gset) > 1) idx <- grep("GPL6480", attr(gset, "names")) else idx <- 1
    gset <- gset[[idx]]
    
    # make proper column names to match toptable 
    fvarLabels(gset) <- make.names(fvarLabels(gset))
    
    函数fvarLabels()
    分组和样本筛选,剔除标记为“X”的样本,不符合纳入标准:
    # group names for all samples
    gsms <- paste0("1011000000200X221X102211000002XX21X0200X1100202001",
                   "0XX21001X21X1100021XX21020220222211X1220212202X00")
    sml <- c()
    for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
    
    # eliminate samples marked as "X"
    sel <- which(sml != "X")
    sml <- sml[sel]
    gset <- gset[ ,sel]
    
    筛选前99个样本 筛选后85个样本

    对数转换:

    # log2 transform
    ex <- exprs(gset)
    qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
    LogC <- (qx[5] > 100) ||
      (qx[6]-qx[1] > 50 && qx[2] > 0) ||
      (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
    if (LogC) { ex[which(ex <= 0)] <- NaN
    exprs(gset) <- log2(ex) }
    
    ex是一个矩阵
    LogC的逻辑值为F
    根据ex的分位数的特征(包括99分位数是否大于100,极差是否大于50等等),判断LogC的值为,如此判断表达矩阵ex是一个已经过log2转换的矩阵,无需进行对数转换。

    下面进行的是差异分析:

    # set up the data and proceed with analysis
    sml <- paste("Node", sml, sep="")    # set group names
    fl <- as.factor(sml)
    gset$description <- fl
    design <- model.matrix(~ description + 0, gset)
    colnames(design) <- levels(fl)
    fit <- lmFit(gset, design)
    cont.matrix <- makeContrasts(G2-G0, G1-G0, G2-G1, levels=design)
    fit2 <- contrasts.fit(fit, cont.matrix)
    fit2 <- eBayes(fit2, 0.01)
    tT <- topTable(fit2, adjust="fdr", sort.by="B", number=10000)
    Node2vs0 <- topTable(fit2, adjust="fdr",coef = 1, sort.by="B", number=10000)
    Node1vs0 <- topTable(fit2, adjust="fdr",coef = 2, sort.by="B", number=10000)
    Node2vs1 <- topTable(fit2, adjust="fdr",coef = 3, sort.by="B", number=10000)
    
    tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","F","Gene.symbol","Gene.title"))
    write.table(tT, file=stdout(), row.names=F, sep="\t")
    
    gset$description属性和design变量 design
    sml变量和fl变量
    gset$description属性和design变量 design
    fit fit2
    对比矩阵
    Bayes拟合后fit2 差异表达结果提取

    注意比较三个变量列名的不同。。。

    Node2vs0 <- topTable(fit2, adjust="BH",coef = 1, number=10000, p.value = 0.05)
    Node2vs0 <- Node2vs0[, c(1, 3, 23, 24, 25, 26, 27, 28)]
    head(Node2vs0)
    
    
    Node1vs0 <- topTable(fit2, adjust="fdr",coef = 2, number=10000, p.value = 0.05)
    #Node1vs0 <- Node1vs0[, c(1, 3, 23, 24, 25, 26, 27, 28)]
    head(Node1vs0)
    Node1vs0
    
    Node2vs1 <- topTable(fit2, adjust="fdr",coef = 3, number=10000, p.value = 0.05)
    #Node2vs1 <- Node2vs1[, c(1, 3, 23, 24, 25, 26, 27, 28)]
    head(Node2vs1)
    write.table(Node1vs0, file = "Node1vs0.txt", row.names=F, sep="\t")
    write.table(Node2vs0, file = "Node2vs0.txt", row.names=F, sep="\t")
    write.table(Node2vs1, file = "Node2vs1.txt", row.names=F, sep="\t")
    
    tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","F","Gene.symbol","Gene.title"))
    write.table(tT, file=stdout(), row.names=F, sep="\t")
    
    部分结果

    出来的结果表明,I期和0期之间,差异基因不明显,表明林场分期对其混杂影响很大,需要优化分组。

    相关文章

      网友评论

      • 鱼笨:您好,请问一下“gsms <- paste0("1011000000200X221X102211000002XX21X0200X1100202001",
        "0XX21001X21X1100021XX21020220222211X1220212202X00")”这个分组字符串是怎么得到的呢?谢谢!
        Stone_Stan4d:这个是在GEO2R里面自己设置后,在代码脚本里面可以找到。 你也可以自己输入。

      本文标题:GEO2R运行的代码及结果展示

      本文链接:https://www.haomeiwen.com/subject/tnkywxtx.html