样本无重复与DESeq2的对比如下参考文章:
参考文章:
https://www.jianshu.com/p/4863beb3abf9
https://blog.csdn.net/u012110870/article/details/102804557
安装R包
if (!requireNamespace("edgeR", quietly = TRUE))
BiocManager::install("edgeR")
构建DGEList,前期需准备count矩阵,未经过标准化的原始读数
setwd("D:/")
expr_matrix <- read.csv("CK_count_matrix.csv")
head(expr_matrix)
counts <- expr_matrix[,2:3]
group <- 1:2
y <- DGEList(counts=counts, group = group)
y
##An object of class "DGEList"
$counts
CK18 CK69
1 1214 724
2 1458 1317
3 2801 502
4 792 1168
5 486 1492
30451 more rows ...
$samples
group lib.size norm.factors
CK18 1 89473310 1
CK69 2 70278971 1
过滤
keep <- rowSums(cpm(y)>1) >= 1
y <- y[keep, , keep.lib.sizes=FALSE]
y
#An object of class "DGEList"
#$counts
# CK18 CK69
#1 1214 724
#2 1458 1317
#3 2801 502
#4 792 1168
#5 486 1492
#27624 more rows ...
$samples
group lib.size norm.factors
CK18 1 89403338 1
CK69 2 70219884 1
#标准化
> y <- calcNormFactors(y)
差异表达分析
bcv <- 0.1
et <- exactTest(y_bcv, dispersion = bcv ^ 2)
gene1 <- decideTestsDGE(et, p.value = 0.05, lfc = 0)
summary(gene1)
# 2-1
#Down 6462
#NotSig 15076
#Up 6091
有重复分析
library(edgeR)
setwd("D:/")
expr_matrix <- read.csv("transcript_count_matrix.csv")
head(expr_matrix)
counts <- expr_matrix[,2:5]
group <- c("CK", "CK", "T", "T")
transcript <- expr_matrix[,1]
y <- DGEList(counts=counts, genes =transcript, group = group)
y
keep <- rowSums(cpm(y)>1) >= 1
y <- y[keep, , keep.lib.sizes=FALSE]
y
designMat <- model.matrix(~0+group);designMat
plotMDS(y, col=as.numeric(y$samples$group))
legend("bottomleft",as.character(unique(y$samples$group)), col=1:3, pch=20)
y <- estimateGLMCommonDisp(y,design=designMat)
y <- estimateGLMTrendedDisp(y, design=designMat)
y <- estimateGLMTagwiseDisp(y, design=designMat)
plotBCV(y)
fit <- glmFit(y, designMat)
lrt.1vs2 <- glmLRT(fit, contrast = c(1,0))
degs.res.1vs2 <- topTags(lrt.1vs2, n = Inf, adjust.method = 'BH', sort.by = 'PValue')
degs.res.1vs2[1:5, ]
deGenes.1vs2 <- decideTestsDGE(lrt.1vs2, p=0.05, lfc = 1)
summary(deGenes.1vs2)
detag <- rownames(lrt.1vs2)[as.logical(deGenes.1vs2)]
plotSmear(lrt.1vs2, de.tags=detag)
abline(h=c(-1, 1), col='blue')
countsPerMillion <- cpm(y)
countCheck <- countsPerMillion > 1
keep <- which(rowSums(countCheck) >= 2)
y.keep <- y[keep,]
dim(y.keep)
y.norm <- calcNormFactors(y.keep, method = 'TMM')
plotMDS(y.norm, col=as.numeric(y.normgroup))
legend("bottomleft",as.character(unique(y.normgroup)), col=1:3, pch=20)
网友评论