美文网首页TCGA数据分析
生存分析cox回归

生存分析cox回归

作者: Stone_Stan4d | 来源:发表于2018-02-28 13:10 被阅读385次

代码如下:

options(stringsAsFactors = F)
library(survival)
library(xlsx)
library(plyr)
rm(list = ls())

source("http://bioconductor.org/biocLite.R")

#=========处理临床数据============
patientMatch$Smoke<- gsub("Unknown", NA, patientMatch$Smoke)
patientMatch <- read.csv("patientMatch.txt", header = T, row.names = 1)
table(as.numeric(patientMatch$Days2End) >= 30)
patientMatch <- subset(patientMatch, as.numeric(patientMatch$Days2End) >= 30)
days <- function(x, y){
  if(is.na(x))return(y)
  else return(x)
}
patientMatch$Mons2End <- as.numeric(mapply(days, patientMatch$Days2Death, 
                                           patientMatch$Days2LastFol))/30
patientMatch <- patientMatch[!is.na(patientMatch$Days2End),]
head(patientMatch)
dim(patientMatch)
#得到490个病人满足条件
baseLine <- patientMatch[, c(2,3,4,5)]
patientMatch$AgeLog[patientMatch$Age > 55] <- ">55"
patientMatch$AgeLog[patientMatch$Age <= 55] <- "<=55"
patientMatch$Gender <- factor(patientMatch$Gender)

#数据转换称因子变量分组并储存
Freq <- lapply(patientMatch[,c(2:21, 26)], table)

baseLine$Age <- factor(ifelse(patientMatch$Age > 55,">55", "<=55"), 
                       levels = c("<=55", ">55"))
baseLine$Smoke <- factor(patientMatch$Smoke, ordered = T)
baseLine$Acohol <- factor(patientMatch$Alcohol)
patientMatch$PathoStage[patientMatch$PathoStage == "Discrepancy"] <- NA
baseLine$PathoStage <- factor(gsub("[ABC]", "", patientMatch$PathoStage), 
                              levels = c("Stage I", "Stage II", "Stage III", 
                                         "Stage IV"))
baseLine$PathoT <- factor(gsub("[ab]", "", patientMatch$PathoT), 
                          levels = c("T0", "T1", "T2", "T3", "T4"))
baseLine$PathoN <- factor(gsub("[abc]", "", patientMatch$PathoN), 
                          levels = c("N0", "N1", "N2", "N3"))
baseLine$PathoM <- factor(patientMatch$PathoM,levels = c("M0", "M1"))
baseLine$ClinStage <- factor(gsub("[ABC]", "", patientMatch$ClinStage), 
                             levels = c("Stage I", "Stage II", "Stage III", 
                                        "Stage IV"))
baseLine$ClinT <- factor(gsub("[ab]", "", patientMatch$ClinT), 
                         levels = c("T0", "T1", "T2", "T3", "T4"))
baseLine$ClinN <- factor(gsub("[abc]", "", patientMatch$ClinN), 
                         levels = c("N0", "N1", "N2", "N3"))
baseLine$ClinM <- factor(patientMatch$ClinM,levels = c("M0", "M1"))
baseLine$HistoGrade <- factor(patientMatch$HistoGrade, 
                              levels = c("G1", "G2", "G3", "G4"))
baseLine$LymphVasInvas <- factor(patientMatch$LymphVascInvas)
baseLine$MarginStatus <- factor(patientMatch$MarginStatus,
                                levels = c("Negative", "Close", "Positive"))
baseLine$NodeExtraCapsu <- factor(patientMatch$NodeExtraCapsu, 
                                  levels = c("No Extranodal Extension", 
                                             "Microscopic Extension",
                                             "Gross Extension"))
baseLine$Mons2End <- patientMatch$Mons2End
baseLine$VitalStatus <- factor(patientMatch$VitalStatus)
write.csv(baseLine, "CoxbaseLine.csv")
dim(baseLine)

#========基数整理===========
base <- patientMatch[, c(2,3,4,5)]
base$Age <- ifelse(patientMatch$Age > 55,">55", "<=55")
base$Smoke <- patientMatch$Smoke
base$Acohol <- patientMatch$Alcohol
base$PathoStage <- gsub("[ABC]", "", patientMatch$PathoStage)
base$PathoT <- gsub("[ab]", "", patientMatch$PathoT)
base$PathoN <- gsub("[abc]", "", patientMatch$PathoN)
base$PathoM <- patientMatch$PathoM
base$ClinStage <- gsub("[ABC]", "", patientMatch$ClinStage)
base$ClinT <- gsub("[ab]", "", patientMatch$ClinT)
base$ClinN <- gsub("[abc]", "", patientMatch$ClinN)
base$ClinM <- patientMatch$ClinM
base$HistoGrade <- patientMatch$HistoGrade
base$LymphVasInvas <- patientMatch$LymphVascInvas
base$MarginStatus <- patientMatch$MarginStatus
base$NodeExtraCapsu <- patientMatch$NodeExtraCapsu
base$Mons2End <- patientMatch$Mons2End
base$VitalStatus <- factor(patientMatch$VitalStatus)

#整理成基线表
head(base)
write.csv(base, "baseLine0factors.csv")
Freq <- lapply(base[, 1:19], table)
prop <- lapply(Freq[1:19], prop.table)
#===========建表==============

# Character <- c(names(Freq[3]), names(Freq[[3]]))
# Noc <- c(NA, paste0(Freq[[3]]))
# patientChara <- data.frame("Characteritics" = Character, "Number of Case" = Noc)
# Char <- rbind(Char, patientChara)
Char <- NULL
for(i in 1:19){
  Character <- c(names(Freq[i]), names(Freq[[i]]))
  Noc <- c(NA, paste0(Freq[[i]], "(", round(prop[[i]]*100, 2),")"))
  patientChara <- data.frame("Characteristics" = Character, "Number of Case" = Noc)
  Char <- rbind(Char, patientChara)
}

write.xlsx(Char, "BaseLineIntergarted.xls", row.names = F, showNA = F)

#==========cox生存分析=========
head(baseLine)
baseLine$Gender <- factor(baseLine$Gender)
baseLine0 <- baseLine[, c(1, 4:21)]
baseLine0$Laterility <- factor(baseLine0$Laterility)

#转换数据
baseLine0[, c(1:17)] <- lapply(baseLine0[, c(1:17)], as.numeric)

#单因素Cox回归
BaSurv <- Surv(time = baseLine0$Mons2End, 
               event = ifelse(baseLine0$VitalStatus == 'Alive', 0,1))

UniCox <- function(x){
  FML <- as.formula(paste0("BaSurv~", x))
  LCox <- coxph(FML, data = baseLine0)
  Lsum <- summary(LCox)
  HR <- round(Lsum$coefficients[, 2], 2)
  PValue <- round(Lsum$coefficients[, 5], 3)
  CI <- paste0(round(Lsum$conf.int[, 3:4], 2), collapse = "-")
  unicox <- data.frame("Characteristics" = x,
                       "Hazard Ratio" = HR,
                       "CI95" = CI,
                       "P Value" = PValue)
  return(unicox)
}
UniCox("NodeExtraCapsu")
VarNames <- colnames(baseLine0)[1:17]
UniVar <- lapply(VarNames, UniCox)
UniVar <- ldply(UniVar, data.frame)
UniVar$Characteristics[UniVar$P.Value < 0.05]

#========多因素回归=======
fml <- as.formula(paste0("BaSurv~", paste0(UniVar$Characteristics[UniVar$P.Value < 0.05][-4], 
                         collapse = "+"))) 
MultiCox <- coxph(fml, data = baseLine0)

MultiName <- as.character(UniVar$Characteristics[UniVar$P.Value < 0.05][-4])
Multisum <- summary(MultiCox)
MHR <- round(Multisum$coefficients[, 2], 2)
MPValue <- round(Multisum$coefficients[, 5], 3)
MCIL <- paste0(round(Multisum$conf.int[, 3], 2))
MCIU <- paste0(round(Multisum$conf.int[, 4], 2))
MCI <- paste0(MCIL, "-", MCIU)
Multicox <- data.frame("Characteristics" = MultiName,
                       "Hazard Ratio" = MHR,
                       "CI95" = MCI,
                       "P Value" = PValue)

Final <- merge.data.frame(UniVar, Multicox, by = "Characteristics", all = T, sort = T)
write.xlsx(Final, "FinalCox.xls", row.names = F, showNA = F)

改天来补坑

相关文章

网友评论

    本文标题:生存分析cox回归

    本文链接:https://www.haomeiwen.com/subject/uwmbxftx.html