Example先来一个测试例子热热身
install.packages('isva') #现在安装R包
library(isva) #现在加载R包
### load in simulated data 加载包自带模拟数据
data(simdataISVA)
head(simdataISVA) #查看数据,simdataISVA是列表形式的数据集合
$data
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1.788456e-01 -0.322844432 -0.034752029 0.067617242 -0.248939154 -0.7186803614
[2,] -9.112576e-01 0.073619160 1.123583483 0.091834393 -1.846308399 -0.1369259893
[3,] -1.245029e-01 1.043559508 0.364985239 -0.182813643 0.269062771 -0.0545449373
[4,] 1.722139e+00 0.720846841 -1.416458562 0.627775674 -0.149289959 0.2750975765
[5,] -2.572381e-01 -0.193310256 0.603793970 0.402386004 -1.511308213 -1.5698217849
[6,] 2.711833e+00 0.580914755 5.891540554 0.055144429 1.538017347 -0.3251643240
[7,] 1.475850e+00 -0.104729193 0.911327146 0.040063194 0.131681506 -0.3467636728
[8,] -2.622608e-01 -0.724034234 0.290951143 -1.327412045 3.110334554 2.8527455051
[到达getOption("max.print") -- 略过***行]]
$pheno
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[48] 2 2 2
$factors
$factors[[1]]
[1] 2 2 1 2 2 2 2 2 1 1 2 1 1 1 2 2 1 1 1 1 1 1 1 1 2 2 1 1 2 1 1 2 1 2 1 2 2 2 2 2 2 1 2 1 1 1 2
[48] 2 1 2
$factors[[2]]
[1] 2 2 2 2 1 1 2 2 2 2 2 1 2 2 1 1 2 2 1 2 1 1 1 1 1 2 1 2 2 2 2 1 1 1 2 1 1 2 1 1 2 2 1 1 1 1 2
[48] 1 2 1
$deg
[1] 337 51 96 515 28 345 315 271 203 185 495 68 288 494 225 688 558 266 589 132 198 128 306
[24] 6 590 493 559 354 564 437 220 76 627 412 92 239 501 364 24 551 634 562 457 368 303 639
[47] 333 521 614 565 650 698 150 183 749 376 616 719 535 55 426 283 243 11 584 213 236 291 561
[70] 670 314 648 626 338 646
$degL
$degL$PHENO
[1] 337 51 96 515 28 345 315 271 203 185 495 68 288 494 225 688 558 266 589 132 198 128 306
[24] 6 590 493 559 354 564 437 220 76 627 412 92 239 501 364 24 551 634 562 457 368 303 639
[47] 333 521 614 565 650 698 150 183 749 376 616 719 535 55 426 283 243 11 584 213 236 291 561
[70] 670 314 648 626 338 646
$degL$CF1
[1] 337 150 183 11 562 6 24 314 670 749 639 646 306 719 698 437 561 584 162 83 165 477 211
[24] 31 120 498 606 167 613 114 115 629 636 262 640 740 48 595 269 686 305 737 98 202 310 661
[47] 505 122 484 654 599 216 485 36 158 449 276 252 282 542 195 446 645 229 434 642 577 355 549
[70] 113 91 601 253 676 359
$degL$CF2
[1] 495 376 306 616 426 266 24 76 345 220 437 68 634 291 457 368 650 614 713 527 61 214 529
[24] 349 275 340 680 681 392 623 223 666 35 400 325 610 192 625 146 725 274 451 407 8 212 138
[47] 270 478 518 123 231 82 474 173 152 421 574 289 237 539 534 525 447 84 394 471 247 710 104
[70] 53 544 15 440 39 362
data.m <- simdataISVA$data
pheno.v <- simdataISVA$pheno
####factors matrix (two potential confounding factors, e.g chip and cohort) 因子矩阵(两个潜在的混杂因素,例如芯片和队列)
factors.m<-cbind(simdataISVA$factors[[1]],simdataISVA$factors[[2]]) #按列合并2个混杂因素
colnames(factors.m) <- c("CF1","CF2") #给factors.m起个列名,第一列叫CF1,第二列叫CF2
###Estimate number of significant components of variation估计变异的重要组成部分的数量
rmt.o <- EstDimRMT(data.m)
print(paste("Number of significant components=",rmt.o$dim,sep=""))
[1] "Number of significant components=3"
### this makes sense since 1 component is associated with the
### the phenotype of interest, while the other two are associated
### with the confounders 这是有道理的,因为1个组分与感兴趣的表型相关,而其他两个组分与混杂因子相关
ncp <- rmt.o$dim-1
### Do ISVA
### run with the confounders as given 与给定的混杂因素一起运行
isva.o <- DoISVA(data.m,pheno.v,factors.m,factor.log=rep(FALSE,2), pvthCF=0.01,th=0.05,ncomp=ncp,icamethod="fastICA")
### Evaluation (ISVs should correlate with confounders) 评估(ISV应与混杂因素相关)
### modeling of CFs CF的建模
print(cor(isva.o$isv,factors.m))
CF1 CF2
[1,] 0.9788975 -0.2131201
[2,] -0.1618752 0.9795638
### this shows that CFs are reconstructed fairly well
### sensitivity (fraction of detected true positives)#这表明CFs的重建灵敏度相当高(检测到真阳性的比例)
print(length(intersect(isva.o$deg,simdataISVA$deg))/length(simdataISVA$deg))
[1] 0.4133333
### PPV (1-false discovery rate) 1-FDR
print(length(intersect(isva.o$deg,simdataISVA$deg))/length(isva.o$deg))
[1] 0.9393939
### run not knowing what confounders there are and with ncp=3 say.####不知道有什么混杂因素,并且用ncp = 3
isva2.o <- DoISVA(data.m,pheno.v,cf.m=NULL,factor.log=rep(FALSE,2),
pvthCF=0.01,th=0.05,ncomp=3,icamethod="fastICA")
### sensitivity (fraction of detected true positives)检测到的真阳性的一部分
print(length(intersect(isva2.o$deg,simdataISVA$deg))/length(simdataISVA$deg))
[1] 0.4133333
### PPV (1-false discovery rate)1-FDR
print(length(intersect(isva2.o$deg,simdataISVA$deg))/length(isva2.o$deg))
[1] 0.9393939
最近刚开始学习使用ICA进行甲基化数据的分析,参考了很多资料,主要是一些软件的官方文档,和isva被引用的文献,在这里记录下自己的学习历程,也希望对大家有帮助。
下一篇将详细介绍isva包的具体使用说明~
Reference:
https://cran.r-project.org/web/packages/isva/isva.pdf
https://rdrr.io/cran/isva/
转载请联系作者!
网友评论