代码如下:
options(stringsAsFactors = F)
library(survival)
library(xlsx)
library(plyr)
rm(list = ls())
source("http://bioconductor.org/biocLite.R")
#=========处理临床数据============
patientMatch$Smoke<- gsub("Unknown", NA, patientMatch$Smoke)
patientMatch <- read.csv("patientMatch.txt", header = T, row.names = 1)
table(as.numeric(patientMatch$Days2End) >= 30)
patientMatch <- subset(patientMatch, as.numeric(patientMatch$Days2End) >= 30)
days <- function(x, y){
if(is.na(x))return(y)
else return(x)
}
patientMatch$Mons2End <- as.numeric(mapply(days, patientMatch$Days2Death,
patientMatch$Days2LastFol))/30
patientMatch <- patientMatch[!is.na(patientMatch$Days2End),]
head(patientMatch)
dim(patientMatch)
#得到490个病人满足条件
baseLine <- patientMatch[, c(2,3,4,5)]
patientMatch$AgeLog[patientMatch$Age > 55] <- ">55"
patientMatch$AgeLog[patientMatch$Age <= 55] <- "<=55"
patientMatch$Gender <- factor(patientMatch$Gender)
#数据转换称因子变量分组并储存
Freq <- lapply(patientMatch[,c(2:21, 26)], table)
baseLine$Age <- factor(ifelse(patientMatch$Age > 55,">55", "<=55"),
levels = c("<=55", ">55"))
baseLine$Smoke <- factor(patientMatch$Smoke, ordered = T)
baseLine$Acohol <- factor(patientMatch$Alcohol)
patientMatch$PathoStage[patientMatch$PathoStage == "Discrepancy"] <- NA
baseLine$PathoStage <- factor(gsub("[ABC]", "", patientMatch$PathoStage),
levels = c("Stage I", "Stage II", "Stage III",
"Stage IV"))
baseLine$PathoT <- factor(gsub("[ab]", "", patientMatch$PathoT),
levels = c("T0", "T1", "T2", "T3", "T4"))
baseLine$PathoN <- factor(gsub("[abc]", "", patientMatch$PathoN),
levels = c("N0", "N1", "N2", "N3"))
baseLine$PathoM <- factor(patientMatch$PathoM,levels = c("M0", "M1"))
baseLine$ClinStage <- factor(gsub("[ABC]", "", patientMatch$ClinStage),
levels = c("Stage I", "Stage II", "Stage III",
"Stage IV"))
baseLine$ClinT <- factor(gsub("[ab]", "", patientMatch$ClinT),
levels = c("T0", "T1", "T2", "T3", "T4"))
baseLine$ClinN <- factor(gsub("[abc]", "", patientMatch$ClinN),
levels = c("N0", "N1", "N2", "N3"))
baseLine$ClinM <- factor(patientMatch$ClinM,levels = c("M0", "M1"))
baseLine$HistoGrade <- factor(patientMatch$HistoGrade,
levels = c("G1", "G2", "G3", "G4"))
baseLine$LymphVasInvas <- factor(patientMatch$LymphVascInvas)
baseLine$MarginStatus <- factor(patientMatch$MarginStatus,
levels = c("Negative", "Close", "Positive"))
baseLine$NodeExtraCapsu <- factor(patientMatch$NodeExtraCapsu,
levels = c("No Extranodal Extension",
"Microscopic Extension",
"Gross Extension"))
baseLine$Mons2End <- patientMatch$Mons2End
baseLine$VitalStatus <- factor(patientMatch$VitalStatus)
write.csv(baseLine, "CoxbaseLine.csv")
dim(baseLine)
#========基数整理===========
base <- patientMatch[, c(2,3,4,5)]
base$Age <- ifelse(patientMatch$Age > 55,">55", "<=55")
base$Smoke <- patientMatch$Smoke
base$Acohol <- patientMatch$Alcohol
base$PathoStage <- gsub("[ABC]", "", patientMatch$PathoStage)
base$PathoT <- gsub("[ab]", "", patientMatch$PathoT)
base$PathoN <- gsub("[abc]", "", patientMatch$PathoN)
base$PathoM <- patientMatch$PathoM
base$ClinStage <- gsub("[ABC]", "", patientMatch$ClinStage)
base$ClinT <- gsub("[ab]", "", patientMatch$ClinT)
base$ClinN <- gsub("[abc]", "", patientMatch$ClinN)
base$ClinM <- patientMatch$ClinM
base$HistoGrade <- patientMatch$HistoGrade
base$LymphVasInvas <- patientMatch$LymphVascInvas
base$MarginStatus <- patientMatch$MarginStatus
base$NodeExtraCapsu <- patientMatch$NodeExtraCapsu
base$Mons2End <- patientMatch$Mons2End
base$VitalStatus <- factor(patientMatch$VitalStatus)
#整理成基线表
head(base)
write.csv(base, "baseLine0factors.csv")
Freq <- lapply(base[, 1:19], table)
prop <- lapply(Freq[1:19], prop.table)
#===========建表==============
# Character <- c(names(Freq[3]), names(Freq[[3]]))
# Noc <- c(NA, paste0(Freq[[3]]))
# patientChara <- data.frame("Characteritics" = Character, "Number of Case" = Noc)
# Char <- rbind(Char, patientChara)
Char <- NULL
for(i in 1:19){
Character <- c(names(Freq[i]), names(Freq[[i]]))
Noc <- c(NA, paste0(Freq[[i]], "(", round(prop[[i]]*100, 2),")"))
patientChara <- data.frame("Characteristics" = Character, "Number of Case" = Noc)
Char <- rbind(Char, patientChara)
}
write.xlsx(Char, "BaseLineIntergarted.xls", row.names = F, showNA = F)
#==========cox生存分析=========
head(baseLine)
baseLine$Gender <- factor(baseLine$Gender)
baseLine0 <- baseLine[, c(1, 4:21)]
baseLine0$Laterility <- factor(baseLine0$Laterility)
#转换数据
baseLine0[, c(1:17)] <- lapply(baseLine0[, c(1:17)], as.numeric)
#单因素Cox回归
BaSurv <- Surv(time = baseLine0$Mons2End,
event = ifelse(baseLine0$VitalStatus == 'Alive', 0,1))
UniCox <- function(x){
FML <- as.formula(paste0("BaSurv~", x))
LCox <- coxph(FML, data = baseLine0)
Lsum <- summary(LCox)
HR <- round(Lsum$coefficients[, 2], 2)
PValue <- round(Lsum$coefficients[, 5], 3)
CI <- paste0(round(Lsum$conf.int[, 3:4], 2), collapse = "-")
unicox <- data.frame("Characteristics" = x,
"Hazard Ratio" = HR,
"CI95" = CI,
"P Value" = PValue)
return(unicox)
}
UniCox("NodeExtraCapsu")
VarNames <- colnames(baseLine0)[1:17]
UniVar <- lapply(VarNames, UniCox)
UniVar <- ldply(UniVar, data.frame)
UniVar$Characteristics[UniVar$P.Value < 0.05]
#========多因素回归=======
fml <- as.formula(paste0("BaSurv~", paste0(UniVar$Characteristics[UniVar$P.Value < 0.05][-4],
collapse = "+")))
MultiCox <- coxph(fml, data = baseLine0)
MultiName <- as.character(UniVar$Characteristics[UniVar$P.Value < 0.05][-4])
Multisum <- summary(MultiCox)
MHR <- round(Multisum$coefficients[, 2], 2)
MPValue <- round(Multisum$coefficients[, 5], 3)
MCIL <- paste0(round(Multisum$conf.int[, 3], 2))
MCIU <- paste0(round(Multisum$conf.int[, 4], 2))
MCI <- paste0(MCIL, "-", MCIU)
Multicox <- data.frame("Characteristics" = MultiName,
"Hazard Ratio" = MHR,
"CI95" = MCI,
"P Value" = PValue)
Final <- merge.data.frame(UniVar, Multicox, by = "Characteristics", all = T, sort = T)
write.xlsx(Final, "FinalCox.xls", row.names = F, showNA = F)
改天来补坑
网友评论