美文网首页ggplot集锦
使用isva(Independent Surrogate Var

使用isva(Independent Surrogate Var

作者: 机器人会画画 | 来源:发表于2019-06-23 16:54 被阅读0次

    Example先来一个测试例子热热身

    install.packages('isva')   #现在安装R包
    library(isva)  #现在加载R包
    
    ### load in simulated data  加载包自带模拟数据
    data(simdataISVA)
    head(simdataISVA)  #查看数据,simdataISVA是列表形式的数据集合
    
    $data
                    [,1]         [,2]         [,3]         [,4]         [,5]          [,6]
      [1,]  1.788456e-01 -0.322844432 -0.034752029  0.067617242 -0.248939154 -0.7186803614
      [2,] -9.112576e-01  0.073619160  1.123583483  0.091834393 -1.846308399 -0.1369259893
      [3,] -1.245029e-01  1.043559508  0.364985239 -0.182813643  0.269062771 -0.0545449373
      [4,]  1.722139e+00  0.720846841 -1.416458562  0.627775674 -0.149289959  0.2750975765
      [5,] -2.572381e-01 -0.193310256  0.603793970  0.402386004 -1.511308213 -1.5698217849
      [6,]  2.711833e+00  0.580914755  5.891540554  0.055144429  1.538017347 -0.3251643240
      [7,]  1.475850e+00 -0.104729193  0.911327146  0.040063194  0.131681506 -0.3467636728
      [8,] -2.622608e-01 -0.724034234  0.290951143 -1.327412045  3.110334554  2.8527455051
    [到达getOption("max.print") -- 略过***行]]
    
    $pheno
     [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
    [48] 2 2 2
    
    $factors
    $factors[[1]]
     [1] 2 2 1 2 2 2 2 2 1 1 2 1 1 1 2 2 1 1 1 1 1 1 1 1 2 2 1 1 2 1 1 2 1 2 1 2 2 2 2 2 2 1 2 1 1 1 2
    [48] 2 1 2
    
    $factors[[2]]
     [1] 2 2 2 2 1 1 2 2 2 2 2 1 2 2 1 1 2 2 1 2 1 1 1 1 1 2 1 2 2 2 2 1 1 1 2 1 1 2 1 1 2 2 1 1 1 1 2
    [48] 1 2 1
    
    $deg
     [1] 337  51  96 515  28 345 315 271 203 185 495  68 288 494 225 688 558 266 589 132 198 128 306
    [24]   6 590 493 559 354 564 437 220  76 627 412  92 239 501 364  24 551 634 562 457 368 303 639
    [47] 333 521 614 565 650 698 150 183 749 376 616 719 535  55 426 283 243  11 584 213 236 291 561
    [70] 670 314 648 626 338 646
    
    $degL
    $degL$PHENO
     [1] 337  51  96 515  28 345 315 271 203 185 495  68 288 494 225 688 558 266 589 132 198 128 306
    [24]   6 590 493 559 354 564 437 220  76 627 412  92 239 501 364  24 551 634 562 457 368 303 639
    [47] 333 521 614 565 650 698 150 183 749 376 616 719 535  55 426 283 243  11 584 213 236 291 561
    [70] 670 314 648 626 338 646
    
    $degL$CF1
     [1] 337 150 183  11 562   6  24 314 670 749 639 646 306 719 698 437 561 584 162  83 165 477 211
    [24]  31 120 498 606 167 613 114 115 629 636 262 640 740  48 595 269 686 305 737  98 202 310 661
    [47] 505 122 484 654 599 216 485  36 158 449 276 252 282 542 195 446 645 229 434 642 577 355 549
    [70] 113  91 601 253 676 359
    
    $degL$CF2
     [1] 495 376 306 616 426 266  24  76 345 220 437  68 634 291 457 368 650 614 713 527  61 214 529
    [24] 349 275 340 680 681 392 623 223 666  35 400 325 610 192 625 146 725 274 451 407   8 212 138
    [47] 270 478 518 123 231  82 474 173 152 421 574 289 237 539 534 525 447  84 394 471 247 710 104
    [70]  53 544  15 440  39 362
    
    
    data.m <- simdataISVA$data
    pheno.v <- simdataISVA$pheno
    
    ####factors matrix (two potential confounding factors, e.g chip and cohort)  因子矩阵(两个潜在的混杂因素,例如芯片和队列)
    factors.m<-cbind(simdataISVA$factors[[1]],simdataISVA$factors[[2]])  #按列合并2个混杂因素
    colnames(factors.m) <- c("CF1","CF2")   #给factors.m起个列名,第一列叫CF1,第二列叫CF2
    
    
    ###Estimate number of significant components of variation估计变异的重要组成部分的数量
    rmt.o <- EstDimRMT(data.m)
    print(paste("Number of significant components=",rmt.o$dim,sep=""))
    [1] "Number of significant components=3"
    ### this makes sense since 1 component is associated with the
    ### the phenotype of interest, while the other two are associated
    ### with the confounders  这是有道理的,因为1个组分与感兴趣的表型相关,而其他两个组分与混杂因子相关
    ncp <- rmt.o$dim-1 
    ### Do ISVA
    ### run with the confounders as given  与给定的混杂因素一起运行
    isva.o <- DoISVA(data.m,pheno.v,factors.m,factor.log=rep(FALSE,2), pvthCF=0.01,th=0.05,ncomp=ncp,icamethod="fastICA")
    ### Evaluation (ISVs should correlate with confounders) 评估(ISV应与混杂因素相关)
    ### modeling of CFs    CF的建模
    print(cor(isva.o$isv,factors.m))
    
                CF1        CF2
    [1,]  0.9788975 -0.2131201
    [2,] -0.1618752  0.9795638
    
    
    ### this shows that CFs are reconstructed fairly well
    ### sensitivity (fraction of detected true positives)#这表明CFs的重建灵敏度相当高(检测到真阳性的比例)
    print(length(intersect(isva.o$deg,simdataISVA$deg))/length(simdataISVA$deg))
    [1] 0.4133333
    ### PPV (1-false discovery rate)  1-FDR
    print(length(intersect(isva.o$deg,simdataISVA$deg))/length(isva.o$deg))
    [1] 0.9393939
    ### run not knowing what confounders there are and with ncp=3 say.####不知道有什么混杂因素,并且用ncp = 3
    isva2.o <- DoISVA(data.m,pheno.v,cf.m=NULL,factor.log=rep(FALSE,2),
                                  pvthCF=0.01,th=0.05,ncomp=3,icamethod="fastICA")
    ### sensitivity (fraction of detected true positives)检测到的真阳性的一部分
    print(length(intersect(isva2.o$deg,simdataISVA$deg))/length(simdataISVA$deg))
    [1] 0.4133333
    ### PPV (1-false discovery rate)1-FDR
    print(length(intersect(isva2.o$deg,simdataISVA$deg))/length(isva2.o$deg))
    [1] 0.9393939
    
    

    最近刚开始学习使用ICA进行甲基化数据的分析,参考了很多资料,主要是一些软件的官方文档,和isva被引用的文献,在这里记录下自己的学习历程,也希望对大家有帮助。
    下一篇将详细介绍isva包的具体使用说明~

    Reference:
    https://cran.r-project.org/web/packages/isva/isva.pdf
    https://rdrr.io/cran/isva/

    转载请联系作者!

    相关文章

      网友评论

        本文标题:使用isva(Independent Surrogate Var

        本文链接:https://www.haomeiwen.com/subject/xgtwqctx.html