美文网首页简书付费文章TCGA GEOR语言收藏
TCGA—STAD免疫细胞浸润(CIBERSORT)

TCGA—STAD免疫细胞浸润(CIBERSORT)

作者: 养猪场小老板 | 来源:发表于2020-04-04 22:10 被阅读0次

    - #00 R包安装

    ~~~R

    rm(list=ls())

    if (!requireNamespace("BiocManager", quietly=TRUE))

      install.packages("BiocManager")

    BiocManager::install("TCGAbiolinks")

    library(TCGAbiolinks)

    ~~~

    ##1.1、临床数据下载和整理##

    ~~~R

    cancer_type=paste("TCGA","STAD",sep="-")

    print(cancer_type)

    #下载临床数据方式一

    clinical <- GDCquery_clinic(project = cancer_type, type = "clinical")

    write.csv(clinical,file = paste(cancer_type,"clinical.csv",sep = "-"))

    cl_df1 <- read.csv("TCGA_STAD_clinical.csv",header = T)

    #View(cl_df1)

    #####下载临床下载方式二(官网页面下载)推荐,因为临床信息文件不会很大

    #合并数据整理############官网下载cart然后接下来整理成数据框

    #更改 R工作目录到下载临床信息了文件夹里

    library("XML")

    library("methods")

    ####更改工作目录到有xml文件的目录下

    all_fiels=list.files(path = "./" ,pattern='*.xml$',recursive=T)#head(all_fiels)

    #写循环, 临床信息整理为数据框

    cl = lapply(all_fiels, function(x){

      #x=all_fiels[1]

      result <- xmlParse(file = file.path("./",x))

      rootnode <- xmlRoot(result) 

      xmldataframe <- xmlToDataFrame( rootnode[2] )

      #xml共有两个节点,第二个节点中储存着病人的信息

      return(t(xmldataframe))

    })

    cl_df <- unique(t(do.call(cbind,cl)))

    #View(cl_df)

    save(cl_df,file = 'TCGA_STAD_clinical_df.Rdata')#最好保存在上一个文件夹下

    load(file = 'TCGA_STAD_clinical_df.Rdata')

    #write.csv(cl_df,file = 'TCGA_STAD_clinical_df.csv')

    #write.table(cl_df,file = 'TCGA_STAD_clinical_df.txt')#推荐

    ~~~

    ##1.2 #临床数据整理#####

    ~~~R

    colnames(cl_df)

    cl_df_select<-as.data.frame(cl_df[,c(5,6,8,9,11,12,37,38)] )

    #write.csv(cl_df_select,file = "cl_df_select.csv")

    #cl_df_select <- read.csv(file = "cl_df_select.csv",header = T)

    #View(cl_df_select)

    #########对stage_event列分割######

    cl_df_select_new<-tidyr::separate(cl_df_select,stage_event,into = c("stage","TMN"),sep="T")%>% separate(TMN, c('T', 'MN'), sep = 'N')%>% separate(MN, c('M', 'N'), sep = 'M')

    #这两项的TNM分期不清,去除####

    cl_df_select_new <- cl_df_select_new[-c(68,389),]

    #View(cl_df_select_new)

    ##删除

    {

    cl_df_select_new<- cl_df_select_new[!cl_df_select_new[,7]=="7th",]

    cl_df_select_new<- cl_df_select_new[!cl_df_select_new[,7]=="6th",]

    cl_df_select_new<-tidyr::separate(cl_df_select_new,stage,into = c("th","stage"),sep="h")

    cl_df_select_new<-cl_df_select_new

    相关文章

      网友评论

        本文标题:TCGA—STAD免疫细胞浸润(CIBERSORT)

        本文链接:https://www.haomeiwen.com/subject/qkzsphtx.html