5.1.1 edgeR

作者: YZZZZZero | 来源:发表于2022-05-07 15:51 被阅读0次

5.1.1 edgeR
edgeR
edgeR 包——学习
Week 12
13高通量测序-edgeR文库标准化
Statquest笔记2—edgeR (No.59)
DEGSeq edgeR DESeq
edgeR差异分析
Guideline 5.1.1 被拒
苹果审核被拒绝，4.2,5.1,4.8

样本无重复与DESeq2的对比如下参考文章：

https://www.jianshu.com/p/517167c50a5f

参考文章：

https://www.jianshu.com/p/4863beb3abf9
https://blog.csdn.net/u012110870/article/details/102804557

安装R包

if (!requireNamespace("edgeR", quietly = TRUE))
    BiocManager::install("edgeR")

构建DGEList，前期需准备count矩阵，未经过标准化的原始读数

setwd("D:/")
expr_matrix <- read.csv("CK_count_matrix.csv")
head(expr_matrix)
counts <- expr_matrix[,2:3]
group <- 1:2
y <- DGEList(counts=counts, group = group)
y
##An object of class "DGEList"
$counts
  CK18 CK69
1 1214  724
2 1458 1317
3 2801  502
4  792 1168
5  486 1492
30451 more rows ...

$samples
     group lib.size norm.factors
CK18     1 89473310            1
CK69     2 70278971            1

过滤

keep <- rowSums(cpm(y)>1) >= 1
y <- y[keep, , keep.lib.sizes=FALSE]
y
#An object of class "DGEList"
#$counts
#  CK18 CK69
#1 1214  724
#2 1458 1317
#3 2801  502
#4  792 1168
#5  486 1492
#27624 more rows ...

$samples
     group lib.size norm.factors
CK18     1 89403338            1
CK69     2 70219884            1
#标准化
> y <- calcNormFactors(y)

差异表达分析

bcv <- 0.1
et <- exactTest(y_bcv, dispersion = bcv ^ 2)
gene1 <- decideTestsDGE(et, p.value = 0.05, lfc = 0)
summary(gene1)
#         2-1
#Down    6462
#NotSig 15076
#Up      6091

有重复分析

library(edgeR)
setwd("D:/")
expr_matrix <- read.csv("transcript_count_matrix.csv")
head(expr_matrix)
counts <- expr_matrix[,2:5]
group <- c("CK", "CK", "T", "T")
transcript <- expr_matrix[,1]
y <- DGEList(counts=counts, genes =transcript, group = group)
y

keep <- rowSums(cpm(y)>1) >= 1
y <- y[keep, , keep.lib.sizes=FALSE]
y

designMat <- model.matrix(~0+group);designMat
plotMDS(y, col=as.numeric(y$samples$group))
legend("bottomleft",as.character(unique(y$samples$group)), col=1:3, pch=20)

y <- estimateGLMCommonDisp(y,design=designMat)
y <- estimateGLMTrendedDisp(y, design=designMat)
y <- estimateGLMTagwiseDisp(y, design=designMat)
plotBCV(y)
fit <- glmFit(y, designMat)
lrt.1vs2 <- glmLRT(fit, contrast = c(1,0))
degs.res.1vs2 <- topTags(lrt.1vs2, n = Inf, adjust.method = 'BH', sort.by = 'PValue')
degs.res.1vs2[1:5, ]

deGenes.1vs2 <- decideTestsDGE(lrt.1vs2, p=0.05, lfc = 1)
summary(deGenes.1vs2)

detag <- rownames(lrt.1vs2)[as.logical(deGenes.1vs2)]
plotSmear(lrt.1vs2, de.tags=detag)
abline(h=c(-1, 1), col='blue')

countsPerMillion <- cpm(y)
countCheck <- countsPerMillion > 1
keep <- which(rowSums(countCheck) >= 2)
y.keep <- y[keep,]
dim(y.keep)

y.norm <- calcNormFactors(y.keep, method = 'TMM')
plotMDS(y.norm, col=as.numeric(y.norm $samples$ group))
legend("bottomleft",as.character(unique(y.norm $samples$ group)), col=1:3, pch=20)