1.示例数据
rm(list = ls())
library(randomForestSRC)
library(survival)
data(vdv, package = "randomForestSRC")
dim(vdv)
## [1] 78 4707
vdv[1:5,1:5]
## Time Censoring AA555029_RC AA598803_RC AB002301
## 1 12.53 0 -0.5049331 -0.2425008 -0.199315682
## 2 6.44 0 -0.5879813 0.4384945 -0.621200562
## 3 10.66 0 -0.3521244 -0.2258911 0.006643856
## 4 13.00 0 -0.4750357 0.5016111 -0.671029449
## 5 11.98 0 -0.1660964 0.1361991 0.989934564
boxplot(vdv[,3:10])
![](https://img.haomeiwen.com/i9475888/386848058321a320.png)
2.代码
这里使用的是官方文档里的例子 https://cran.r-project.org/web/packages/randomForestSRC/randomForestSRC.pdf
var.select的示例。
和cox结合,按照单因素cox的p值设置了权重。
cox.weights <- function(rfsrc.f, rfsrc.data) {
event.names <- all.vars(rfsrc.f)[1:2]
p <- ncol(rfsrc.data) - 2
event.pt <- match(event.names, names(rfsrc.data))
xvar.pt <- setdiff(1:ncol(rfsrc.data), event.pt)
sapply(1:p, function(j) {
cox.out <- coxph(rfsrc.f, rfsrc.data[, c(event.pt, xvar.pt[j])])
pvalue <- summary(cox.out)$coef[5]
if (is.na(pvalue)) 1.0 else 1/(pvalue + 1e-100)
})
}
rfsrc.f <- as.formula(Surv(Time, Censoring) ~ .)
rfsrc.f
## Surv(Time, Censoring) ~ .
cox.weights函数就是根据单因素coxp值计算权重用的。rfsrc.f是生存模型公式
cox.wts <- cox.weights(rfsrc.f, vdv)
#vh.model <- rfsrc(rfsrc.f, vdv , nsplit = 10,
# xvar.wt = cox.wts,importance = "random",
# na.action ="na.impute",ntree = 1000)
变量选择
vh.breast.cox <- var.select(rfsrc.f, vdv, method = "vh", nstep = 5,
nrep = 10, xvar.wt = cox.wts)
## --------------------- Iteration: 1 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 7 joint-vimp: 0.093
iteration: 2 # vars: 12 joint-vimp: 0.152
iteration: 3 # vars: 17 joint-vimp: 0.176
iteration: 4 # vars: 22 joint-vimp: 0.193
iteration: 5 # vars: 27 joint-vimp: 0.192
PE: 23.1884 dim: 22
## --------------------- Iteration: 2 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 41 joint-vimp: 0.263
iteration: 2 # vars: 44 joint-vimp: 0.264
iteration: 3 # vars: 48 joint-vimp: 0.257
PE: 27.381 dim: 44
## --------------------- Iteration: 3 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 36 joint-vimp: 0.255
iteration: 2 # vars: 39 joint-vimp: 0.251
PE: 29.7619 dim: 36
## --------------------- Iteration: 4 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 11 joint-vimp: 0.256
iteration: 2 # vars: 16 joint-vimp: 0.286
iteration: 3 # vars: 20 joint-vimp: 0.291
iteration: 4 # vars: 25 joint-vimp: 0.294
iteration: 5 # vars: 30 joint-vimp: 0.302
iteration: 6 # vars: 35 joint-vimp: 0.3
PE: 38.6905 dim: 30
## --------------------- Iteration: 5 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 39 joint-vimp: 0.186
iteration: 2 # vars: 42 joint-vimp: 0.186
iteration: 3 # vars: 46 joint-vimp: 0.19
iteration: 4 # vars: 49 joint-vimp: 0.196
iteration: 5 # vars: 52 joint-vimp: 0.196
iteration: 6 # vars: 56 joint-vimp: 0.192
PE: 14.4928 dim: 52
## --------------------- Iteration: 6 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 38 joint-vimp: 0.27
iteration: 2 # vars: 41 joint-vimp: 0.277
iteration: 3 # vars: 45 joint-vimp: 0.301
iteration: 4 # vars: 48 joint-vimp: 0.302
iteration: 5 # vars: 52 joint-vimp: 0.302
iteration: 6 # vars: 55 joint-vimp: 0.3
PE: 46.4286 dim: 52
## --------------------- Iteration: 7 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 10 joint-vimp: 0.139
iteration: 2 # vars: 15 joint-vimp: 0.166
iteration: 3 # vars: 20 joint-vimp: 0.178
iteration: 4 # vars: 24 joint-vimp: 0.178
iteration: 5 # vars: 29 joint-vimp: 0.191
iteration: 6 # vars: 34 joint-vimp: 0.199
iteration: 7 # vars: 38 joint-vimp: 0.192
PE: 25 dim: 34
## --------------------- Iteration: 8 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 47 joint-vimp: 0.233
iteration: 2 # vars: 50 joint-vimp: 0.236
iteration: 3 # vars: 53 joint-vimp: 0.235
PE: 15.873 dim: 50
## --------------------- Iteration: 9 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 9 joint-vimp: 0.193
iteration: 2 # vars: 14 joint-vimp: 0.219
iteration: 3 # vars: 19 joint-vimp: 0.233
iteration: 4 # vars: 24 joint-vimp: 0.231
PE: 22.619 dim: 19
## --------------------- Iteration: 10 ---------------------
## selecting variables using Variable Hunting ...
## iteration: 1 # vars: 8 joint-vimp: 0.177
iteration: 2 # vars: 13 joint-vimp: 0.219
iteration: 3 # vars: 18 joint-vimp: 0.225
iteration: 4 # vars: 23 joint-vimp: 0.238
iteration: 5 # vars: 28 joint-vimp: 0.241
iteration: 6 # vars: 33 joint-vimp: 0.25
iteration: 7 # vars: 37 joint-vimp: 0.249
PE: 34.5238 dim: 33
## fitting forests to final selected variables ...
##
##
## -----------------------------------------------------------
## family : surv
## var. selection : Variable Hunting
## conservativeness : medium
## dimension : 4705
## sample size : 78
## K-fold : 5
## no. reps : 10
## nstep : 5
## ntree : 500
## nsplit : 10
## mvars : 942
## nodesize : 2
## refitted forest : TRUE
## depth ratio : 3.1277
## model size : 37.2 +/- 11.9796
## PE (K-fold) : 27.7959 +/- 9.9451
##
##
## Top variables:
## depth rel.freq
## AL080059 7.653 100
## NM_005915 7.548 100
## NM_016448 7.668 100
## AA555029_RC 7.827 90
## Contig35251_RC 7.712 90
## NM_006201 7.786 90
## NM_006681 7.890 90
## NM_015239 7.983 90
## Contig43983_RC 7.667 70
## Contig46223_RC 7.916 70
## Contig51464_RC 7.670 70
## NM_001216 8.008 70
## NM_006931 8.091 70
## NM_016577 8.087 70
## Contig44409 7.872 60
## Contig47405_RC 8.154 60
## NM_000436 8.181 60
## NM_020974 8.209 60
## Contig20217_RC 8.109 50
## Contig32185_RC 8.169 50
## Contig48328_RC 8.258 50
## Contig54742_RC 8.208 50
## NM_014246 7.750 50
## NM_018265 8.225 50
## AB020689 8.251 40
## AF052162 8.246 40
## Contig55377_RC 7.957 40
## NM_000507 8.274 40
## NM_004504 8.219 40
## NM_018354 8.284 40
## NM_020142 7.918 40
## AL137718 8.330 30
## Contig25343_RC 8.429 30
## Contig38726_RC 8.354 30
## Contig53268_RC 8.047 30
## Contig63102_RC 7.432 30
## NM_001673 8.368 30
## NM_014968 8.421 30
## -----------------------------------------------------------
#nrep指定变量选择过程的重复次数,实际使用可以写1000,但会增加计算时间
vh.breast.cox$topvars
## [1] "AL080059" "NM_005915" "NM_016448" "AA555029_RC"
## [5] "Contig35251_RC" "NM_006201" "NM_006681" "NM_015239"
## [9] "Contig43983_RC" "Contig46223_RC" "Contig51464_RC" "NM_001216"
## [13] "NM_006931" "NM_016577" "Contig44409" "Contig47405_RC"
## [17] "NM_000436" "NM_020974" "Contig20217_RC" "Contig32185_RC"
## [21] "Contig48328_RC" "Contig54742_RC" "NM_014246" "NM_018265"
## [25] "AB020689" "AF052162" "Contig55377_RC" "NM_000507"
## [29] "NM_004504" "NM_018354" "NM_020142" "AL137718"
## [33] "Contig25343_RC" "Contig38726_RC" "Contig53268_RC" "Contig63102_RC"
## [37] "NM_001673" "NM_014968"
没有常规意义上的系数和cox模型那样的公式,可以用作和lasso回归一样的变量筛选,缩小一下范围,但不用做最后一步,
网友评论