输入数据为read counts数据,而不是normalize之后的FPKM/TPM
输入文件:第一列为基因名,2-4列为control 组3个重复,5-7列为infected组3个重复
#载入数据
mycounts<-read.csv("gene_readcount.csv")
head(mycounts)
rownames(mycounts)<-mycounts[,1]
mycounts<-mycounts[,-1]
head(mycounts)
#载入edgeR
library(edgeR)
#分组
group =c(rep("control",3),rep("infected",3))
counts <- mycounts[,1:6]
counts
#差异分析
y <- DGEList(counts=counts, group = group)
y
y <- calcNormFactors(y)
y <- estimateCommonDisp(y)
y <- estimateTagwiseDisp(y)
et <- exactTest(y,pair = c("control","infectd"))
topTags(et)
ordered_tags <- topTags(et, n=100000)
ordered_tags
allDiff=ordered_tags$table
allDiff=allDiff[is.na(allDiff$FDR)==FALSE,]
diff=allDiff
newData=y$pseudo.counts
#导出数据
write.table(diff,file="controlVSinfected.xls",sep="\t",quote=F)
#筛选显著差异数据 (FDR < 0.05且|foldchange|>1.5, logFC即为log2foldchangge)
diffSig = diff[(diff$FDR < 0.05 & (diff$logFC>0.585 | diff$logFC<(-0.585))),]
write.table(diffSig,file="controlVSinfected_Sig.xls",sep="\t",quote=F)
网友评论