> library(edgeR)
> library(limma)
> setwd("D:/")
> expr_matrix <- read.csv("transcript_count_matrix.csv")
> head(expr_matrix)
transcript_id CK18 CK69 T18 T69
1 MSTRG.18752.1 1214 724 918 581
2 MSTRG.11041.1 1458 1317 540 302
3 MSTRG.21591.1 2801 502 446 359
4 MSTRG.14117.1 792 1168 624 1569
5 MSTRG.7407.1 486 1492 1855 2097
6 MSTRG.7407.3 390 1168 228 764
> counts <- expr_matrix[,2:5]
> group <- c("CK", "CK", "T", "T")
> transcript <- expr_matrix[,1]
> y <- DGEList(counts=counts, genes =transcript, group = group)
> y
An object of class "DGEList"
$counts
CK18 CK69 T18 T69
1 1214 724 918 581
2 1458 1317 540 302
3 2801 502 446 359
4 792 1168 624 1569
5 486 1492 1855 2097
30451 more rows ...
$samples
group lib.size norm.factors
CK18 CK 89473310 1
CK69 CK 70278971 1
T18 T 81813431 1
T69 T 76984977 1
$genes
genes
1 MSTRG.18752.1
2 MSTRG.11041.1
3 MSTRG.21591.1
4 MSTRG.14117.1
5 MSTRG.7407.1
30451 more rows ...
> keep <- filterByExpr(y)
> table(keep)
keep
FALSE TRUE
1018 29438
> y <- y[keep, , keep.lib.sizes=FALSE]
> y <- calcNormFactors(y)
> y$samples
group lib.size norm.factors
CK18 CK 89017858 1.0064240
CK69 CK 70130046 0.9560699
T18 T 81558247 0.9301324
T69 T 76576585 1.1173382
> plotMDS(y, col=rep(1:2, each=2))
> plotMDS(y)
> y <- estimateCommonDisp(y, verbose=TRUE)
Disp = 0.73806 , BCV = 0.8591
> plotBCV(y)
> et <- exactTest(y)
> top <- topTags(et)
> top
Comparison of groups: T-CK
genes logFC logCPM PValue
26756 MSTRG.20829.1 -17.10412 6.791271 2.870725e-11
5422 MSTRG.19982.1 -16.02954 5.717000 2.160155e-10
26380 MSTRG.7784.2 15.81060 5.498230 3.258128e-10
22792 MSTRG.17835.2 -12.80841 8.573027 6.830962e-10
5092 MSTRG.13851.3 15.39324 5.081155 7.133885e-10
10520 MSTRG.13698.3 15.31394 5.001918 8.279609e-10
1605 MSTRG.14269.1 15.27153 4.959513 8.965476e-10
30441 MSTRG.14683.2 15.00903 4.697222 1.467732e-09
23906 MSTRG.10097.2 14.96704 4.655322 1.588126e-09
21599 MSTRG.5987.1 14.75320 4.441690 2.372516e-09
FDR
26756 8.450840e-07
5422 3.179533e-06
26380 3.197092e-06
22792 3.770367e-06
5092 3.770367e-06
10520 3.770367e-06
1605 3.770367e-06
30441 5.194585e-06
23906 5.194585e-06
21599 6.984212e-06
> summary(de <- decideTestsDGE(et));
T-CK
Down 125
NotSig 29181
Up 132
> detags <- rownames(y)[as.logical(de)]
> plotSmear(et, de.tags=detags)
> abline(h=c(-1, 1), col="blue")
> abline(h=c(-1, 1), col="blue")
>
> abline(h=c(-1, 1), col="blue")
>
> design <- model.matrix(~0+group);design
groupCK groupT
1 1 0
2 1 0
3 0 1
4 0 1
attr(,"assign")
[1] 1 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
> y <- estimateDisp(y, design, robust=TRUE)
Error in fitFDistRobustly(var, df1 = df, covariate = covariate, winsor.tail.p = winsor.tail.p) :
statmod package required but is not installed
> install.packages("statmod")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
试开URL’https://cran.rstudio.com/bin/windows/contrib/3.6/statmod_1.4.34.zip'
Content type 'application/zip' length 285669 bytes (278 KB)
downloaded 278 KB
package ‘statmod’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\yize\AppData\Local\Temp\RtmpaqoJul\downloaded_packages
> y <- estimateDisp(y, design, robust=TRUE)
> y$common.dispersion
[1] 0.7017119
> plotBCV(y)
> fit <- glmQLFit(y, design, robust=TRUE)
> plotQLDisp(fit)
> qlf <- glmQLFTest(fit, coef=2:3)
Error in glmfit$coefficients[, coef, drop = FALSE] : 下标出界
> qlf <- glmQLFTest(fit, coef=1:2)
> topTags(qlf)
Coefficient: groupCK groupT
genes logFC.groupCK logFC.groupT logCPM F
19773 MSTRG.8349.1 -14.41772 -13.32484 6.161872 2646.042
27250 MSTRG.17553.1 -13.71611 -13.99838 6.081715 2639.279
3586 MSTRG.2987.1 -14.56360 -13.37351 6.082907 2637.124
7921 MSTRG.15227.1 -13.69366 -13.70289 6.233752 2636.310
28242 MSTRG.6527.1 -13.75446 -13.72020 6.194808 2631.706
8901 MSTRG.13011.1 -13.70567 -13.74024 6.209180 2625.411
26265 MSTRG.3120.1 -13.62765 -13.75216 6.243455 2623.619
21134 MSTRG.12588.2 -13.58061 -13.57212 6.355626 2616.077
12415 MSTRG.21479.1 -13.98569 -13.99914 5.939728 2606.411
11524 MSTRG.19348.1 -13.66187 -14.18647 6.031634 2601.578
PValue FDR
19773 2.038534e-06 5.510429e-05
27250 2.047919e-06 5.510429e-05
3586 2.050923e-06 5.510429e-05
7921 2.052060e-06 5.510429e-05
28242 2.058508e-06 5.510429e-05
8901 2.067375e-06 5.510429e-05
26265 2.069909e-06 5.510429e-05
21134 2.080631e-06 5.510429e-05
12415 2.094500e-06 5.510429e-05
11524 2.101488e-06 5.510429e-05
> FDR <- p.adjust(qlf$table$PValue, method="BH")
> sum(FDR < 0.05)
[1] 29419
> qlf <- glmQLFTest(fit)
> topTags(qlf)
Coefficient: groupT
genes logFC logCPM F PValue
11275 MSTRG.10568.1 -15.25428 6.293569 2705.323 2.671557e-06
27250 MSTRG.17553.1 -13.99838 6.081715 2669.282 2.736616e-06
11524 MSTRG.19348.1 -14.18647 6.031634 2656.249 2.760753e-06
7921 MSTRG.15227.1 -13.70289 6.233752 2637.304 2.796431e-06
26265 MSTRG.3120.1 -13.75216 6.243455 2636.954 2.797097e-06
8901 MSTRG.13011.1 -13.74024 6.209180 2629.106 2.812094e-06
28242 MSTRG.6527.1 -13.72020 6.194808 2628.039 2.814141e-06
21134 MSTRG.12588.2 -13.57212 6.355626 2615.163 2.839046e-06
10440 MSTRG.820.1 -13.63421 6.383175 2609.525 2.850059e-06
7872 MSTRG.406.1 -13.58699 6.413094 2607.847 2.853350e-06
FDR
11275 7.542372e-05
27250 7.542372e-05
11524 7.542372e-05
7921 7.542372e-05
26265 7.542372e-05
8901 7.542372e-05
28242 7.542372e-05
21134 7.542372e-05
10440 7.542372e-05
7872 7.542372e-05
> summary(decideTests(qlf))
groupT
Down 29367
NotSig 71
Up 0
代码
library(edgeR)
setwd("D:/")
expr_matrix <- read.csv("transcript_count_matrix.csv")
head(expr_matrix)
counts <- expr_matrix[,2:5]
group <- c("CK", "CK", "T", "T")
transcript <- expr_matrix[,1]
y <- DGEList(counts=counts, genes =transcript, group = group)
y
keep <- filterByExpr(y)
table(keep)
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
y$samples
plotMDS(y)
y <- estimateCommonDisp(y, verbose=TRUE)
plotBCV(y)
et <- exactTest(y)
top <- topTags(et)
top
summary(de <- decideTestsDGE(et))
detags <- rownames(y)[as.logical(de)]
plotSmear(et, de.tags=detags)
abline(h=c(-1, 1), col="blue")
design <- model.matrix(~0+group);design
y <- estimateDisp(y, design, robust=TRUE)
y$common.dispersion
plotBCV(y)
fit <- glmQLFit(y, design, robust=TRUE)
plotQLDisp(fit)
install.packages statmod
library(limma)
qlf <- glmQLFTest(fit, coef=2)
topTags(qlf)
FDR <- p.adjust(qlf$table$PValue, method="BH")
sum(FDR < 0.05)
qlf <- glmQLFTest(fit)
topTags(qlf)
summary(decideTests(qlf))
网友评论