1.数据
rm(list = ls())
library(caret)
library(mlbench)
data(Sonar)
dat = Sonar
#dat$Class = ifelse(dat$Class=="M",0,1)
#dat$Class = factor(dat$Class)
dat[1:4,1:4]
## V1 V2 V3 V4
## 1 0.0200 0.0371 0.0428 0.0207
## 2 0.0453 0.0523 0.0843 0.0689
## 3 0.0262 0.0582 0.1099 0.1083
## 4 0.0100 0.0171 0.0623 0.0205
dim(dat)
## [1] 208 61
table(dat$Class)
##
## M R
## 111 97
这个数据是mlbench里的内置数据。前207列是变量,最后一列是分类,因子型的。
2.拆分训练集和测试集
set.seed(123)
trainIndex <- createDataPartition(dat$Class, p = 0.7, list = FALSE)
train <- dat[trainIndex, ]
test <- dat[-trainIndex, ]
3.训练模型
需要的时间有点长,所以设置了只运行一次的跳过机制。
# Define the control parameters for SVM-RFE
ctrl <- rfeControl(functions = caretFuncs,
method = "cv", number = 10)
# Train the SVM model using SVM-RFE
set.seed(123)
f = "svmsonar.Rdata"
if(!file.exists(f)){
svm_rfe_model <- rfe(x = train[, -ncol(train)],
y = train$Class,
rfeControl = ctrl, method = "svmRadial")
save(svm_rfe_model,file = f)
}
load(f)
4.查看模型选择的变量,计算C-index
看模型选中的变量有哪些
svm_rfe_model$optVariables
## [1] "V11" "V12" "V13" "V10" "V9" "V49" "V51" "V36" "V48" "V35" "V37" "V52"
## [13] "V47" "V5" "V4" "V21" "V46" "V43" "V14" "V8" "V1" "V20" "V22" "V6"
## [25] "V34" "V23" "V44" "V45" "V50" "V24" "V3" "V31" "V38" "V57" "V58" "V19"
## [37] "V25" "V53" "V42" "V28" "V33" "V27" "V26" "V2" "V32" "V59" "V7" "V54"
## [49] "V15" "V55" "V56" "V29" "V16" "V60" "V40" "V17" "V39" "V18" "V30" "V41"
计算C-index值
# Evaluate the performance of the model using ROC curve
library(pROC)
svm_rfe_pred <- predict(svm_rfe_model, newdata = test[, -ncol(test)])
library(Hmisc)
rcorr.cens(as.numeric(test$Class),svm_rfe_pred)
## C Index Dxy S.D. n missing
## 8.239583e-01 6.479167e-01 9.587715e-02 6.200000e+01 0.000000e+00
## uncensored Relevant Pairs Concordant Uncertain
## 6.200000e+01 1.920000e+03 1.582000e+03 0.000000e+00
网友评论