N1/N2 PFS

作者: 陈宇乔 | 来源:发表于2018-12-22 14:32 被阅读0次

    step1

    rm(list = ls()) 
    library(dplyr)
    library(tidyverse)
    
    TCGA.LUAD.GDC_phenotype <- read.delim("TCGA-LUAD.GDC_phenotype.tsv")
    
    #colnames(TCGA.LUAD.GDC_phenotype)
    #head(TCGA.LUAD.GDC_phenotype)
    
    LUAD_Pheno<- select(TCGA.LUAD.GDC_phenotype, "submitter_id.samples", "vital_status.diagnoses", "days_to_death.diagnoses", "days_to_last_follow_up.diagnoses", "pathologic_N", "pathologic_M", "days_to_new_tumor_event_after_initial_treatment")
    
    LUAD_Pheno<- LUAD_Pheno[grep('M0',LUAD_Pheno$pathologic_M),]  #####只筛选M0的
    #table(LUAD_Pheno$pathologic_N)
    LUAD_Pheno<- filter(LUAD_Pheno, pathologic_N == 'N1'| pathologic_N == 'N2')#####只筛选N1/2的
    
    
    LUAD_Pheno[is.na(LUAD_Pheno)]<- 0
    
    LUAD_Pheno$PFS_status<- ifelse((LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment == 0 & LUAD_Pheno$days_to_death.diagnoses == 0), 0,1)
    LUAD_Pheno$OS<- ifelse(LUAD_Pheno$days_to_last_follow_up.diagnoses > LUAD_Pheno$days_to_death.diagnoses, LUAD_Pheno$days_to_last_follow_up.diagnoses,LUAD_Pheno$days_to_death.diagnoses)
    LUAD_Pheno$PFS<- ifelse(LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment == 0, LUAD_Pheno$OS ,LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment)
    
    LUAD_Pheno<- LUAD_Pheno[grep('01A',LUAD_Pheno$submitter_id.samples),]  #####只筛选01A的   01A代表肿瘤
    
    TCGA.LUAD.htseq_counts <- read.delim("TCGA-LUAD.htseq_counts.tsv")
    Ensemble_ID<- TCGA.LUAD.htseq_counts[,1]
    Ensemble_ID2<- as.character(Ensemble_ID)
    ID<- strsplit(Ensemble_ID2, "[.]")
    str(ID)
    IDlast<- sapply(ID, "[", 1)
    TCGA.LUAD.htseq_counts<- cbind(IDlast,TCGA.LUAD.htseq_counts[,-1])
    save(TCGA.LUAD.GDC_phenotype,TCGA.LUAD.htseq_counts,LUAD_Pheno, file = 'TCGA.Rdata')
    
    
    

    step 2

    load(file = 'TCGA.Rdata')
    
    #################
    library(clusterProfiler)
    library(org.Hs.eg.db)
    keytypes(org.Hs.eg.db)
    
    # 采用bitr 命令进行ID的转换
    
    gene_ids <- bitr(TCGA.LUAD.htseq_counts$IDlast, fromType = "ENSEMBL", toType = c("SYMBOL", "GENENAME"), OrgDb="org.Hs.eg.db") 
    
    # 查看转换的结果
    
    # gene_ids
    ######################### 合并结果 merge后基因从6万个变成了2万个
    library(dplyr)
    TCGA.LUAD.htseq_counts<- rename(TCGA.LUAD.htseq_counts, c(IDlast="ENSEMBL"))
    TCGA.LUAD.htseq_counts<- merge(gene_ids, TCGA.LUAD.htseq_counts, by = "ENSEMBL")
    TCGA.LUAD.htseq_counts[1:10,1:10]
    
    ######################RNA-seq研究基因用逗号分隔###########
    # gene<- c("MPO IL6 CXCR2 SNAIL SH2D1A ADGRE5 CORO1A BST2 S100A8 FCGR3B ZEB1 KREMEN1 OAS3 VCAM1 PRDM1 PTGS2 ICOS TDO2 ITGAE CDKN3 CXCR4 MKI67 GZMB")
    # gene<- strsplit(gene,'\ ')
    # head(gene)
    # str(gene)
    # gene2<- sapply(gene, "[")
    # gene2<- as.character(gene2)
    
    # gene<- c('KREMEN1')  ###这个基因没找到,其ensemble ID: ENSG00000183762.11
    ################TCGA.LUAD.htseq_counts[grep('ENSG00000183762', TCGA.LUAD.htseq_counts$Ensembl_ID),]  #####
    ################grep的用法
    gene<- c("MPO","IL6",'SNAI1', "CXCR2","SH2D1A","ADGRE5","CORO1A", "BST2",  "S100A8" , "FCGR3B"  ,"ZEB1" , "OAS3" ,  "VCAM1"  , "PRDM1"  ,"PTGS2",   "ICOS" ,   "TDO2",    "ITGAE"  , "CDKN3" ,  "CXCR4",   "MKI67" ,  "GZMB")
    ## RNA_seq_data<-filter(TCGA.LUAD.htseq_counts, TCGA.LUAD.htseq_counts$SYMBOL == 'OAS3') #############查找一下有没有这个基因
    #############################生存曲线
    #######select only gene to analysis
    library(survminer)
    library(survival)
    library(ggplot2)
    for (x in gene) {
      RNA_seq_data<-filter(TCGA.LUAD.htseq_counts, TCGA.LUAD.htseq_counts$SYMBOL == x)
      RNA_seq_data<- t(RNA_seq_data)
      colnames(RNA_seq_data)
      RNA_seq_data<- as.data.frame(RNA_seq_data)
      # str(RNA_seq_data)
      # colnames(LUAD_Pheno)
      RNA_seq_data$submitter_id.samples<- row.names(RNA_seq_data)
      colnames(RNA_seq_data)<- c("Expressionvalue","submitter_id.samples")
      LUAD_Pheno$submitter_id.samples<- as.character(LUAD_Pheno$submitter_id.samples)
      LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
      LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
      LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
      LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
      finaldata<- inner_join(LUAD_Pheno,RNA_seq_data, by = "submitter_id.samples")
      finaldata$PFS_status<- as.character(finaldata$PFS_status)
      finaldata$PFS_status<- as.numeric(as.factor(finaldata$PFS_status))
      finaldata$Expressionvalue<- as.numeric(as.character(finaldata$Expressionvalue))
      finaldata$group<- ifelse(finaldata$Expressionvalue>median(finaldata$Expressionvalue),'high','low')
      library(survminer)
      library(survival)
      fit <- survfit(Surv(finaldata$PFS,finaldata$PFS_status)~finaldata$group, data=finaldata) 
      summary(fit)
      pp<- ggsurvplot(fit, data = finaldata, conf.int = F, pval = TRUE,
                      xlim = c(0,2000), # present narrower X axis, but not affect
                      # survival estimates. 
                      xlab = "Time in days", # customize X axis label. 
                      break.time.by = 200) # break X axis in time intervals by 500. 
      ggsave(filename = paste("plot_",x,".pdf",sep = ""))
      print(x)
    }
    

    相关文章

      网友评论

          本文标题:N1/N2 PFS

          本文链接:https://www.haomeiwen.com/subject/lpmikqtx.html