生存分析

作者: 医只蜗牛 | 来源:发表于2021-07-23 20:53 被阅读0次

    https://www.bbsmax.com/A/8Bz8GPXVdx/ 有关phenotype各列的含义

    TCGA的28篇教程- 对TCGA数据库的任意癌症中任意基因做生存分析

    CSDN :tcga压缩包提取合并_单基因生信分析流程(1)一文解决TCGA数据下载整理问题

    以上两个最终得到的是一样的结果。后面的KM分析需要进一步看看。

    【TCGAbiolinks】可以用

    TCGAbiolinks下载的生存分析图片

    从表格可以看出比较详细。

    【UCSCXenaTools】下载的比较简单。

    image.png

    这个下载的是官网的处理好的表格,里面的内容比较简单。
    是直接处理好的。跟网上下载的不一样。

    对比以上两张图看出不同。

    需要注意的是,两个包下载的东西不一样,不可混淆。一个是clinical,UCSCXenaTools是下载phenotype数据再分析,且,最好官网下载,UCSCXenaTools下载的读取可能出现错误。【Rdata直接加载结果与解压后加载的不一样】

    library("UCSCXenaTools")
    phenotype<-XenaGenerate(subset = XenaCohorts =="GDC TCGA Kidney Clear Cell Carcinoma (KIRC)")%>% 
      XenaFilter(filterDatasets    = "TCGA-KIRC.GDC_phenotype.tsv") %>% 
      XenaQuery() %>%
      XenaDownload() %>% 
      XenaPrepare()   #加载数据
    
    head(phenotype)
    save(phenotype,file = "TCGA-KIRC.GDC_phenotype_file.tsv")
    save(phenotype,file = "TCGA-KIRC.GDC_phenotype_file.Rdata")
    
    ###前面第一行读取的是手动下载的。用包下载的读取会出现错误。所以直接跳过
    
    kirc.phenotype <- read.csv("TCGA-KIRC.GDC_phenotype.tsv", header = T, sep = "\t")
    # 提取肿瘤的表型信息
    #kirc.phenotype = phenotype
    tumor.sample <- sapply(as.character(kirc.phenotype$submitter_id.samples), function(x){
      number <- as.numeric(unlist(strsplit(unlist(strsplit(as.character(x), split="-"))[4], split = "[A-Z]"))[1])
      if(number<=9){
        id <- as.character(x)
        return(id)
      }
    })
    tumor.sample.id <- unlist(tumor.sample)
    tumor.kirc.phenotype <- kirc.phenotype[match(tumor.sample.id, kirc.phenotype$submitter_id.samples), ]
    rownames(tumor.kirc.phenotype) <- tumor.kirc.phenotype$submitter_id.samples
    # 获取唯一的肿瘤样本以及表型矩阵
    uniq.sample.id <- unique(tumor.kirc.phenotype$submitter_id)
    uniq.tumor.kirc.phenotype <- tumor.kirc.phenotype[match(uniq.sample.id, tumor.kirc.phenotype$submitter_id), ]
    rownames(uniq.tumor.kirc.phenotype) <- uniq.tumor.kirc.phenotype$submitter_id
    # 获取OS time
    os.time <- rep(0, nrow(uniq.tumor.kirc.phenotype))
    dead.index <- which(uniq.tumor.kirc.phenotype$days_to_death > 0)
    alive.index <- which(uniq.tumor.kirc.phenotype$days_to_last_follow_up.diagnoses > 0)
    os.time[dead.index] <- uniq.tumor.kirc.phenotype$days_to_death[dead.index]
    os.time[alive.index] <- uniq.tumor.kirc.phenotype$days_to_last_follow_up.diagnoses[alive.index]
    # 获取终点事件,并用0,1表示
    os.index <- which(os.time > 0)
    os.time <- os.time[os.index]
    status <- rep(0, nrow(uniq.tumor.kirc.phenotype))
    dead.index <- which(uniq.tumor.kirc.phenotype$vital_status.demographic == "Dead")
    alive.index <- which(uniq.tumor.kirc.phenotype$vital_status.demographic == "Alive")
    status[dead.index] <- 0
    status[alive.index] <- 1
    status <- status[os.index]
    uniq.tumor.kirc.phenotype <- uniq.tumor.kirc.phenotype[os.index, ]
    # 提取感兴趣的表型信息
    interesting.tumor.kirc.data <- data.frame(gender = uniq.tumor.kirc.phenotype$gender.demographic,
                                              age = uniq.tumor.kirc.phenotype$age_at_initial_pathologic_diagnosis,
                                              status = status,
                                              OS = os.time)
    rownames(interesting.tumor.kirc.data) <- rownames(uniq.tumor.kirc.phenotype)
    
    
    

    tcga压缩包提取合并_单基因生信分析流程(1)一文解决TCGA数据下载整理问题
    这里面为什么有clinical.info = "patient"的原因,详见:https://www.jianshu.com/p/dc253f1713d5 引用部分

    --------------------------分隔符-------------------------

    几个生存分析可以一起看看,估计差不多的意思。
    tcga压缩包提取合并_单基因生信分析流程(1)一文解决TCGA数据下载整理问题
    TCGA的28篇教程- 对TCGA数据库的任意癌症中任意基因做生存分析
    TCGA的28篇教程-整理GDC下载的xml格式的临床资料

    -------------------------分隔符--------------------------

    【clinical下载比较】

    官网手动下载:加入cart,download xml文件。然后读取。

    # Load the packages required to read XML files.
    library("XML")
    library("methods")
    getwd()
    dir='D:/R_code/follow_practice/xuetu_GEO_follow/week_practise/02_follow/02_02_KM_/gdc_download_20210723_163311.823442/'
      
    all_fiels=list.files(path = dir ,pattern='*.xml$',recursive=T)
    cl = lapply(all_fiels
                , function(x){
                  #x=all_fiels[1]
                  result <- xmlParse(file = file.path(dir,x)) 
                  rootnode <- xmlRoot(result)  
                  xmldataframe <- xmlToDataFrame( rootnode[2] ) 
                  return(t(xmldataframe))
                })
    cl_df <- t(do.call(cbind,cl))
    save(cl_df,file = 'GDC_TCGA_KIRC_clinical_df.Rdata')
    
    

    【TCGAbiolinks下载】

    一顿操作后

    ##TCGAbiolinks下载:
    library(TCGAbiolinks)
    library(dplyr)
    library(DT)
    library(SummarizedExperiment)
    
    
    #下面填入要下载的癌症种类
    request_cancer=c("KIRC")
    for (i in request_cancer) {
      cancer_type=paste("TCGA",i,sep="-")
      print(cancer_type)
      #下载临床数据
      clinical <- GDCquery_clinic(project = cancer_type, type = "clinical")
      write.csv(clinical,file = paste(cancer_type,"clinical.csv",sep = "-"))
    }  
    
    
    library(TCGAbiolinks)
    library(SummarizedExperiment)
    library(dplyr)
    library(DT)
    library(reshape2) 
    
    clinical0 <- GDCquery_clinic(project = "TCGA-KIRC", type = "clinical")
    
    ## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
    datatable(clinical, filter = 'top', 
              options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
              rownames = FALSE)
    rm(list=ls())
    query <- GDCquery(project = "TCGA-KIRC", 
                      data.category = "Clinical", 
                      file.type = "xml")
    GDCdownload(query)
    clinical000 <- GDCprepare_clinic(query, clinical.info = "patient")
    

    最终结果:


    image.png
    image.png

    比较,cl_df<clinical000<clinical=clinical0;
    且几个为包含关系。

    【对比phenotype和clinical】

    差不多,但是命名规则略有不同。

    相关文章

      网友评论

        本文标题:生存分析

        本文链接:https://www.haomeiwen.com/subject/xdvpmltx.html