美文网首页
2021-06-27 芯片数据limma差异分析

2021-06-27 芯片数据limma差异分析

作者: 学习生信的小兔子 | 来源:发表于2021-06-27 07:45 被阅读0次

    加载相应的包

    rm(list=ls())
    options(stringsAsFactors = FALSE)
    Sys.setenv("language"='en')
    #DGE for microarray by limma
    library(ggplot2)
    library(limma)
    setwd("D:/GEO数据挖掘与meta分析/练习/芯片数据limma差异分析(代码)/芯片数据limma差异分析(代码)") 
    

    导入数据

    rawexprSet=read.csv("exprs-19samples.csv",header=TRUE,row.names=1,check.names = FALSE)  
    #读取矩阵文件
    dim(rawexprSet)
    [1] 39426    19
    #exprSet=log2(rawexprSet)#log2转换
    exprSet=rawexprSet#如果已经是log2数据,运行这一步
    exprSet[1:5,1:5]
    GSM1386832 GSM1386833 GSM1386834  GSM1386835  GSM1386836
    ILMN_1343291  0.4605613 0.82011750 -0.8167353  0.13896560  0.09584236
    ILMN_1343295 -0.8032513 0.71279240 -1.6947494  1.23427820 -0.36104536
    ILMN_1651199  0.2524052 0.07226562  0.5513372 -0.10347176  0.22701597
    ILMN_1651209  0.2555184 0.06649685  0.3488650 -0.05129576  0.57850313
    ILMN_1651210  0.4912973 0.18652153  0.7219935 -0.07959080  0.45670270
    

    数据来源-GEO2R处理数据

    https://www.ncbi.nlm.nih.gov/geo/




    ## 画箱线图,比较数据分布情况
    par(mfrow=c(1,2))
    boxplot(data.frame(exprSet),col="blue") 
    ggsave("boxplot.pdf", width = 8, height = 5)
    dev.off()
    

    limma分析

    #读取样本分类信息
    group <- read.csv("datTraits.csv",header=TRUE,row.names=1,check.names = FALSE)
    table(group$treat_type)
    atherosclerotic         control 
                  9              10 
    design <- model.matrix(~0+factor(group$treat_type))#把group设置成一个model matrix#
    colnames(design)=levels(factor(group$treat_type))
    rownames(design)=colnames(exprSet)
    design
    
    fit <- lmFit(exprSet,design)##线性拟合
    cont.matrix<-makeContrasts(atherosclerotic-control,levels = design)
    fit2=contrasts.fit(fit,cont.matrix)##用对比模型进行差值计算
    fit2 <- eBayes(fit2)  ##贝叶斯检验
    ##eBayes() with trend=TRUE
    tempOutput = topTable(fit2,coef=1,n=Inf,adjust="BH",sort.by="B",resort.by="M") 
    nrDEG = na.omit(tempOutput)
    
    write.csv(nrDEG, "limmaOut.csv")
    
    group
    design

    筛选差异基因

    #筛选有显著差异的基因  adj.P.Val意思是矫正后P值
    foldChange=1.5 #fold change=1意思是差异是两倍
    pvalue =0.01 #0.05
    
    diff <- nrDEG
    diffSig = diff[(diff$P.Val < pvalue & (diff$logFC>foldChange | diff$logFC<(-foldChange))),]
    dim(diffSig)
    [1] 523   6
    #write.table(diffSig, file="diffSig.xls",sep="\t",quote=F)
    write.csv(diffSig, "diffSig.csv")
    
    #把上调和下调分别输入up和down两个文件
    diffUp = diff[(diff$P.Val < pvalue & (diff$logFC>foldChange)),]#foldchange>0是上调,foldchange<0是下调#
    dim(diffUp)
    [1] 20  6
    #write.table(diffUp, file="up.xls",sep="\t",quote=F)#把上调和下调分别输入up和down两个文件#
    write.csv(diffUp, "diffUp.csv")
    diffDown = diff[(diff$P.Val< pvalue & (diff$logFC<(-foldChange))),]
    dim(diffDown)
    [1] 503   6
    #write.table(diffDown, file="down.xls",sep="\t",quote=F)
    write.csv(diffDown, "diffDown.csv")
    

    相关文章

      网友评论

          本文标题:2021-06-27 芯片数据limma差异分析

          本文链接:https://www.haomeiwen.com/subject/olzayltx.html