library(edgeR)
library(limma)
#library(airway)
counts <- read.table("conw.xls", header=T, sep="\t", row.names=1, comment.char="",check.names=F)
groups <- factor(c(1,2))
y <- DGEList(counts=counts,group=groups)
data.frame(Sample=colnames(y),groups)
design <- model.matrix(~groups)
y <- calcNormFactors(y)
keep.exprs<- edgeR::filterByExpr(y,group=groups) #过滤
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y) # 标准化
y_bcv <- y # 差异表达分析
bcv <- 0.4
et <- exactTest(y_bcv, dispersion = bcv ^ 2)
gene1 <- decideTestsDGE(et, p.value = 0.05, lfc = 0)
summary(gene1)
# summary(dt <- decideTestsDGE(et, p.value = 0.05, lfc = 0))
write.table(dt,'t2.xls',sep = '\t')
# awk -F "\t" '{if($2==1)print $0}' t2.xls | les # up gene
# awk -F "\t" '{if($2==-1)print $0}' t2.xls | les # down gene
y_bcv <- y
bcv <- 0.2
et2 <- exactTest(y_bcv, dispersion = bcv ^ 2)
gene2 <- decideTestsDGE(et2, p.value = 0.05, lfc = 0)
summary(gene2)
# 绘制火山图
df <- et2$table
png('t1.png',width=1000,height=1200,res=600) # 绘制火山图
ggplot(df, aes(x=logFC, y=-log10(PValue)) ) +geom_point() +ylab("-log10(p value)") +theme_bw()
print(p)
dev.off()
nosig <- names(gene1@.Data[gene1@.Data == 0,])
gene_name <- sample(nosig,500)
y1 <- y # 对象拷贝
y1$samples$group <- 1
# y1来自上面无重复差异表达代码
y1_gene <- rownames(y1)
housekeeping <- which(y1_gene %in% gene_name)
y0 <- estimateDisp(y1[housekeeping,], trend="none", tagwise=FALSE)
y$common.dispersion <- y0$common.dispersion
design <- model.matrix(~group)
fit <- glmFit(y, design) # GLM 拟合
lrt <- glmLRT(fit) # 差异表达分析
gene3 <- decideTestsDGE(lrt, p.value = 0.05, lfc = 0) # 查看差异基因
summary(gene3)
# edgeR中,这种单因素两水平的分析,还可以使用一种称之为classic analysis的分析方法进行分析。
y0$samples$group=groups
et=exactTest(y0)
topTags(et) # 会显示 FDR值
参考网址
http://events.jianshu.io/p/c1036d39f2b9
网友评论