首先利用compareGroups包绘制基线信息表及进行单因素逻辑回归,然后利用forestmodel进行多因素逻辑回归并绘制森林图,最后应用randomForest包进行随机森林的构建并进行验证,验证指标主要根据C指数。
library(ggpubr)
library(survminer)
library(survival)
library(randomForest)
library(compareGroups)
library(rms)
library(forestmodel)
library(pROC)
df <- read.csv("rawdata.csv",header = T)
df$status<-factor(df$status,labels=c("alive","dead"))#生存状态因子化变为二分变量并赋值
df$sex<-factor(df$sex,labels=c("male","female"))#性别因子化变为二分变量并赋值
df$obstruct<-factor(df$obstruct,labels=c("no","yes"))#梗阻情况因子化变为二分变量并赋值
df$differ <- as.factor(df$differ)
set.seed(2021)
ind <- sample(2, nrow(df),
replace = T,
prob = c(0.7, 0.3))#有放回回抽样,建模组70%,验证组30%
devData <- df[ind == 1,] # 建模组
vadData <- df[ind == 2,] # 验证组
mixdata<-cbind(ind,df) #最终数据集
tab1<- descrTable(ind~.-ind,method = 1, data = mixdata)
export2word(tab1, file='table1.docx')
tab2_uni<- descrTable(status~ .,show.ratio =TRUE,
data = df,method=c(time=2),
show.p.trend=TRUE)
export2word(tab2_uni, file='tab2_uni.docx')
##多因素回归结果用森林图表示
glmmodel <- glm(status ~ rx + sex + obstruct + differ, binomial(link="logit"), data = devData)
forest_model(glmmodel)
####建立逻辑回归模型并计算C指数
str(df$status)
pr <- predict(glmmodel,type=c("response"))
roccurve <- roc(df$status ~ pr)
plot.roc(roccurve,xlim = c(1,0),ylim=c(0,1))
网友评论