美文网首页R甲可
Bioconductor --- TCGA Workflow

Bioconductor --- TCGA Workflow

作者: 林枫bioinfo | 来源:发表于2020-03-16 15:41 被阅读0次

    今天介绍生信平台Bioconductor上的TCGA Workflow,首先是R包的安装

    if (!"BiocManager" %in% rownames(installed.packages())) 
         install.packages("BiocManager")
    BiocManager::install("TCGAWorkflow")
    

    然后载入需要的R包,前者是各种示例数据集的封装,后者用于可视化

    library(TCGAWorkflowData)
    library(DT)
    

    整体上我只是大致地跑一下,真正用到自己的科研项目上各函数各参数还要再细致雕琢。Bioconductor包的特点就是充满了S4对象,如果想流畅操作Bioconductor上的各种R包,对于S4型面向对象编程的深入理解是必不可少的。

    1. Access to the data

    1.1 Downloading data from TCGA data portal

    R包TCGAbiolinks有3个主函数,GDCquery(), GDCdownload(), GDCprepare(),分别用于查询,下载和将数据作为R对象加载。

    R包TCGAbiolinks下载 GDC Legacy Archive 中的甲基化数据和基因表达数据

    library(TCGAbiolinks)
    library(SummarizedExperiment)
    # Obs: The data in the legacy database has been aligned to hg19
    query.met.gbm <- GDCquery(project = "TCGA-GBM", 
                              legacy = TRUE,
                              data.category = "DNA methylation",
                              platform = "Illumina Human Methylation 450", 
                              barcode = c("TCGA-76-4926-01B-01D-1481-05", "TCGA-28-5211-01C-11D-1844-05"))
    GDCdownload(query.met.gbm)
    met.gbm.450 <- GDCprepare(query = query.met.gbm,
                              save = TRUE, 
                              save.filename = "gbmDNAmet450k.rda",
                              summarizedExperiment = TRUE)
    
    query.met.lgg <- GDCquery(project = "TCGA-LGG", 
                              legacy = TRUE,
                              data.category = "DNA methylation",
                              platform = "Illumina Human Methylation 450",
                              barcode = c("TCGA-HT-7879-01A-11D-2399-05", "TCGA-HT-8113-01A-11D-2399-05"))
    GDCdownload(query.met.lgg)
    met.lgg.450 <- GDCprepare(query = query.met.lgg,
                              save = TRUE, 
                              save.filename = "lggDNAmet450k.rda",
                              summarizedExperiment = TRUE)
                              
    met.gbm.lgg <- cbind(assay(met.lgg.450), assay(met.gbm.450))
    
    
    query.exp.lgg <- GDCquery(project = "TCGA-LGG", 
                              legacy = TRUE,
                              data.category = "Gene expression",
                              data.type = "Gene expression quantification",
                              platform = "Illumina HiSeq", 
                              file.type = "results",
                              sample.type = "Primary solid Tumor")
    GDCdownload(query.exp.lgg)
    exp.lgg <- GDCprepare(query = query.exp.lgg, save = TRUE, save.filename = "lggExp.rda")
    
    query.exp.gbm <- GDCquery(project = "TCGA-GBM", 
                              legacy = TRUE,
                              data.category = "Gene expression",
                              data.type = "Gene expression quantification",
                              platform = "Illumina HiSeq", 
                              file.type = "results",
                              sample.type = "Primary solid Tumor")
    GDCdownload(query.exp.gbm)
    exp.gbm <- GDCprepare(query = query.exp.gbm, save = TRUE, save.filename = "gbmExp.rda")
              
    exp.gbm.lgg <- cbind(assay(exp.lgg), assay(exp.gbm))
    

    R包TCGAbiolinks下载 GDC data portal 中的拷贝数变异数据

    # Data.category: Copy number variation aligned to hg38
    query <- GDCquery(project = "TCGA-ACC",
                      data.category = "Copy Number Variation",
                      data.type = "Copy Number Segment",
                      barcode = c( "TCGA-OR-A5KU-01A-11D-A29H-01", "TCGA-OR-A5JK-01A-11D-A29H-01"))
    GDCdownload(query)
    data <- GDCprepare(query)
    
    query <- GDCquery("TCGA-ACC",
                      "Copy Number Variation",
                      data.type = "Masked Copy Number Segment",
                      sample.type = c("Primary solid Tumor")) # see the barcodes with getResults(query)$cases
    GDCdownload(query)
    data <- GDCprepare(query)
    

    R包TCGAbiolinks下载临床数据,有两种方法,第一种方法是使用函数GDCquery_clinical()下载indexed GDC clinical data,第二种方法将下载包含患者所有临床数据的xml文件,并从中检索所需的信息。

    The first one will download only the indexed GDC clinical data which includes diagnoses (vital status, days to death, age at diagnosis, days to last follow up, days to recurrence), treatments (days to treatment, treatment id, therapeutic agents, treatment intent type), demographic (gender, race, ethnicity) and exposures (cigarettes per day, weight, height, alcohol history) information.

    # get indexed clinical patient data for GBM samples
    gbm_clin <- GDCquery_clinic(project = "TCGA-GBM", type = "Clinical")
    
    # get indexed clinical patient data for LGG samples
    lgg_clin <- GDCquery_clinic(project = "TCGA-LGG", type = "Clinical")
    
    # Bind the results, as the columns might not be the same,
    # we will will plyr rbind.fill, to have all columns from both files
    clinical <- plyr::rbind.fill(gbm_clin,lgg_clin)
    
    
    # Fetch clinical data directly from the clinical XML files.
    # if barcode is not set, it will consider all samples.
    # We only set it to make the example faster
    query <- GDCquery(project = "TCGA-GBM",
                      file.type = "xml",
                      data.category = "Clinical",
                      barcode = c("TCGA-08-0516","TCGA-02-0317")) 
    GDCdownload(query)
    clinical <- GDCprepare_clinic(query, clinical.info = "patient")
    clinical.drug <- GDCprepare_clinic(query, clinical.info = "drug")
    clinical.radiation <- GDCprepare_clinic(query, clinical.info = "radiation")
    clinical.admin <- GDCprepare_clinic(query, clinical.info = "admin")
    

    R包TCGAbiolinks下载突变数据

    mutation <– GDCquery_Maf(tumor = "ACC", pipelines = "mutect2")
    

    访问从TCGA论文中检索到的亚型信息

    gbm.subtypes <– TCGAquery_subtype(tumor = "gbm")
    brca.subtypes <– TCGAquery_subtype(tumor = "brca")
    

    补充介绍SummarizedExperiment S4对象,可以使用三个不同的accessor访问数据,assay()获取表达矩阵,rowRanges()获取基因信息,colData()获取样本信息。

    # R object --- SummarizedExperiment
    library(TCGAWorkflowData)
    library(SummarizedExperiment)
    # Load object from TCGAWorkflowData package
    # This object will be created in the further sections,
    data(GBMIllumina_HiSeq) 
    # get expression matrix
    data <- assay(gbm.exp)
    datatable(data[1:10,], 
              options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
              rownames = TRUE)
    # get genes information
    genes.info <- rowRanges(gbm.exp)
    genes.info        
    # get sample information
    sample.info <- colData(gbm.exp)
    

    1.2 Downloading data from Broad TCGA GDAC

    R包RTCGAToolbox下载GDAC数据

    library(RTCGAToolbox)
    # Get the last run dates
    lastRunDate <- getFirehoseRunningDates()[1]
    lastAnalyseDate <- getFirehoseAnalyzeDates()[1]
    # get DNA methylation data, RNAseq2 and clinical data for LGG
    lgg.data <- getFirehoseData(dataset = "LGG",
                                gistic2_Date = getFirehoseAnalyzeDates(1), runDate = lastRunDate,
                                Methylation = FALSE, RNAseq2_Gene_Norm = FALSE, Clinic = TRUE, Mutation = TRUE,
                                fileSizeLimit = 10000)
    
    # get DNA methylation data, RNAseq2 and clinical data for GBM
    gbm.data <- getFirehoseData(dataset = "GBM",
                                runDate = lastDate, gistic2_Date = getFirehoseAnalyzeDates(1),
                                Methylation = FALSE, Clinic = TRUE, RNAseq2_Gene_Norm = TRUE, Mutation = TRUE,
                                fileSizeLimit = 10000)
    
    # To access the data you should use the getData function or simply access with @ (for example gbm.data@Clinical)
    lgg.mut <- getData(lgg.data, "Mutation")
    lgg.clin <- getData(lgg.data, "clinical")
    
    # Download GISTIC results
    lastanalyzedate <- getFirehoseAnalyzeDates(1)
    gistic <- getFirehoseData("GBM", gistic2Date = lastanalyzedate, clinical = FALSE, GISTIC = TRUE)
    
    # get GISTIC results
    gistic.allbygene <- getData(gistic, type = "GISTIC", platform = "AllByGene")
    gistic.thresholedbygene <- getData(gistic, type = "GISTIC", platform = "ThresholdedByGene")
    

    2. Genomic analysis

    拷贝数变异(CNVs)在癌症的发生和发展中起着重要的作用。作为基因组重排(如缺失,重复,插入,倒位和易位)的结果,染色体片段可以发生缺失或扩增。CNVs是大于1 kb的基因组区域,在两种情况下拷贝数发生变化(例如,肿瘤与正常情况)。
    TCGA收集拷贝数数据,并允许对癌症进行CNV分析。利用微阵列和基于测序的技术,对肿瘤和配对正常DNA样本进行分析,探测拷贝数变异。作者将展示如何分析来自TCGA的CNV level 3数据,以确定癌症基因组的反复变异,分析了来自SNP array (Affymetrix Genome-Wide Human SNP Array 6.0) 的GBM和LGG segmented CNV 。

    2.1 Pre-Processing Data

    library(TCGAbiolinks)
    query.lgg.nocnv <- GDCquery(project = "TCGA-LGG",
                                data.category = "Copy number variation",
                                legacy = TRUE,
                                file.type = "nocnv_hg19.seg",
                                sample.type = c("Primary solid Tumor"))
    GDCdownload(query.lgg.nocnv, files.per.chunk = 100)
    lgg.nocnv <- GDCprepare(query.lgg.nocnv, save = TRUE, save.filename = "LGGnocnvhg19.rda")
    
    query.gbm.nocnv <- GDCquery(project = "TCGA-GBM",
                                data.category = "Copy number variation",
                                legacy = TRUE,
                                file.type = "nocnv_hg19.seg",
                                sample.type = c("Primary solid Tumor"))
    GDCdownload(query.gbm.nocnv, files.per.chunk = 100)
    gbm.nocnv <- GDCprepare(query.gbm.nocnv, save = TRUE, save.filename = "GBMnocnvhg19.rda")
    

    2.2 Identification of recurrent CNV in cancer

    在许多分析的基因组中都存在与癌症相关的拷贝数变异。R包GAIA用于识别频发拷贝数变异。

    library(TCGAbiolinks)
    library(downloader)
    library(readr)
    library(gaia)
    
    for(cancer in c("LGG","GBM")){
        message(paste0("Starting", cancer))
        # Prepare CNV matrix
        cnvMatrix <- get(load(paste0(cancer, "nocnvhg19.rda"))) 
        
        # Add label (0 for loss, 1 for gain)
        cnvMatrix <- cbind(cnvMatrix, Label = NA)
        cnvMatrix[cnvMatrix[, "Segment_Mean"] < -0.3, "Label"] <- 0
        cnvMatrix[cnvMatrix[, "Segment_Mean"] > 0.3, "Label"] <- 1
        cnvMatrix <- cnvMatrix [!is.na(cnvMatrix$Label),]
    
        # Remove "Segment_Mean" and change col.names
        cnvMatrix <- cnvMatrix[,-6]
        colnames(cnvMatrix)<- c("Sample.Name", "Chromosome","Start","End","Num.of.Markers", "Aberration")
    
        # Substitute Chromosomes "X" and "Y" with "23" and "24"
        xidx <- which(cnvMatrix$Chromosome=="X")
        yidx <- which(cnvMatrix$Chromosome=="Y")
        cnvMatrix[xidx,"Chromosome"] <- 23
        cnvMatrix[yidx,"Chromosome"] <- 24
        cnvMatrix$Chromosome <- sapply(cnvMatrix$Chromosome,as.integer)
    
        # Recurrent CNV identification with GAIA
    
        # Retrieve probes meta file from broadinstitute website
        # Recurrent CNV identification with GAIA
        gdac.root <- "ftp://ftp.broadinstitute.org/pub/GISTIC2.0/hg19_support/"
        file <- paste0(gdac.root, "genome.info.6.0_hg19.na31_minus_frequent_nan_probes_sorted_2.1.txt")
    
        # Retrieve probes meta file from broadinstitute website
        if(!file.exists(basename(file))) downloader::download(file, basename(file))  
        markersMatrix <- readr::read_tsv(basename(file), col_names = FALSE, col_types = "ccn", progress = TRUE)
        colnames(markersMatrix) <- c("Probe.Name", "Chromosome", "Start")
        unique(markersMatrix$Chromosome)
        xidx <- which(markersMatrix$Chromosome=="X")
        yidx <- which(markersMatrix$Chromosome=="Y")
        markersMatrix[xidx,"Chromosome"] <- 23
        markersMatrix[yidx,"Chromosome"] <- 24
        markersMatrix$Chromosome <- sapply(markersMatrix$Chromosome,as.integer)
        markerID <- apply(markersMatrix,1,function(x) paste0(x[2],":",x[3]))
        print(table(duplicated(markerID)))
        ## FALSE TRUE
        ## 1831041 186
        # There are 186 duplicated markers
        print(table(duplicated(markersMatrix$Probe.Name)))
        ## FALSE
        ## 1831227
        # ... with different names!
        # Removed duplicates
        markersMatrix <- markersMatrix[-which(duplicated(markerID)),]  
        # Filter markersMatrix for common CNV
        markerID <- apply(markersMatrix,1,function(x) paste0(x[2],":",x[3]))
    
        file <- paste0(gdac.root, "CNV.hg19.bypos.111213.txt")
        if(!file.exists(basename(file))) download(file, basename(file))
        commonCNV <- readr::read_tsv(basename(file), progress = TRUE)
        commonID <- apply(commonCNV,1,function(x) paste0(x[2],":",x[3]))
        print(table(commonID %in% markerID))
        print(table(markerID %in% commonID))
        markersMatrix_fil <- markersMatrix[!markerID %in% commonID,]
    
        markers_obj <- load_markers(as.data.frame(markersMatrix_fil))
        nbsamples <- length(get(paste0("query.",tolower(cancer),".nocnv"))$results[[1]]$cases)
        cnv_obj <- load_cnv(cnvMatrix, markers_obj, nbsamples)
        results <- runGAIA(cnv_obj,
                           markers_obj,
                           output_file_name = paste0("GAIA_",cancer,"_flt.txt"),
                           aberrations = -1,  # -1 to all aberrations
                           chromosomes = -1, # -1 to all chromosomes
                           num_iterations = 10,
                           threshold = 0.25)
    
        # Set q–value threshold
        threshold <- 0.0001
    
        # Plot the results
        RecCNV <- t(apply(results,1,as.numeric))
        colnames(RecCNV) <- colnames(results)
        RecCNV <- cbind(RecCNV, score=0)
        minval <- format(min(RecCNV[RecCNV[,"q-value"]!=0,"q-value"]),scientific=FALSE)
        minval <- substring(minval,1, nchar(minval)-1)
        RecCNV[RecCNV[,"q-value"]==0,"q-value"] <- as.numeric(minval)
        RecCNV[,"score"] <- sapply(RecCNV[,"q-value"],function(x) -log10(as.numeric(x)))
        RecCNV[RecCNV[,"q-value"]==as.numeric(minval),]
    
        gaiaCNVplot(RecCNV, threshold)
        save(results, RecCNV, threshold, file = paste0(cancer,"_CNV_results.rda"))
        message(paste0("Results saved as:", cancer,"_CNV_results.rda"))
    }
    

    R包TCGAbiolinks的函数gaiaCNVplot()用于可视化,为了节省时间我只跑了部分样本,得到这样的示意图

    经鉴定拷贝数显著改变的基因组区域 (corrected p-value < 10–4) 随后被注释,以报告可能与癌症相关的扩增和缺失基因。

    2.3 Identification of recurrent CNV in cancer

    library(biomaRt)
    library(GenomicRanges)
    # Get gene information from GENCODE using biomart
    genes <- TCGAbiolinks:::get.GRCh.bioMart(genome = "hg19") 
    # mart <– useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
    # genes <– getBM(attributes = c("hgnc_symbol", "chromosome_name","start_position","end_position"), mart=mart)
    genes <- genes[genes$external_gene_name != "" & genes$chromosome_name %in% c(1:22,"X","Y"),]
    genes[genes$chromosome_name == "X", "chromosome_name"] <- 23
    genes[genes$chromosome_name == "Y", "chromosome_name"] <- 24
    genes$chromosome_name <- sapply(genes$chromosome_name,as.integer)
    genes <- genes[order(genes$start_position),]
    genes <- genes[order(genes$chromosome_name),]
    genes <- genes[,c("external_gene_name", "chromosome_name", "start_position","end_position")]
    colnames(genes) <- c("GeneSymbol","Chr","Start","End")
    genes_GR <- makeGRangesFromDataFrame(genes,keep.extra.columns = TRUE)
    save(genes_GR,genes,file = "genes_GR.rda", compress = "xz")
    
    # Get gene information from GENCODE using biomart
    data(genes_GR) # downloaded in the previous step (available in TCGAWorkflowData)
    cancer = "LGG"
    load(paste0(cancer,"_CNV_results.rda"))
    sCNV <- RecCNV[RecCNV[,"q-value"] <= threshold,c(1:4,6)]
    sCNV <- sCNV[order(sCNV[,3]),]
    sCNV <- sCNV[order(sCNV[,1]),]
    colnames(sCNV) <- c("Chr","Aberration","Start","End","q-value")
    sCNV_GR <- makeGRangesFromDataFrame(sCNV,keep.extra.columns = TRUE)
    
    hits <- findOverlaps(genes_GR, sCNV_GR, type = "within")
    sCNV_ann <- cbind(sCNV[subjectHits(hits),],genes[queryHits(hits),])
    AberrantRegion <- paste0(sCNV_ann[,1],":",sCNV_ann[,3],"-",sCNV_ann[,4])
    GeneRegion <- paste0(sCNV_ann[,7],":",sCNV_ann[,8],"-",sCNV_ann[,9])
    AmpDel_genes <- cbind(sCNV_ann[,c(6,2,5)],AberrantRegion,GeneRegion)
    AmpDel_genes[AmpDel_genes[,2] == 0,2] <- "Del"
    AmpDel_genes[AmpDel_genes[,2] == 1,2] <- "Amp"
    rownames(AmpDel_genes) <- NULL
    
    save(RecCNV, AmpDel_genes, file = paste0(cancer,"_CNV_results.rda"))
    

    2.4 Visualizing multiple genomic alteration events

    R包circlize进行Circos圈图的绘制

    LGGmut <- GDCquery_Maf(tumor = "LGG", pipelines = "mutect2")
    # Retrieve curated mutations for selected cancer (e.g. "LGG") 
    data(mafMutect2LGGGBM)
    # Select only potentially damaging mutations
    LGGmut <- LGGmut[LGGmut$Variant_Classification %in% c("Missense_Mutation",
                                                          "Nonsense_Mutation",
                                                          "Nonstop_Mutation",
                                                          "Frame_Shift_Del",
                                                          "Frame_Shift_Ins"),]
    # Select recurrent mutations (identified in at least two samples)
    mut.id <- paste0(LGGmut$Chromosome,":",LGGmut$Start_Position,"-",LGGmut$End_Position,"|",LGGmut$Reference_Allele)
    mut <- cbind(mut.id, LGGmut)
    # Prepare selected mutations data for circos plot
    s.mut <- mut[mut$mut.id %in% unique(mut.id[duplicated(mut.id)]),]
    s.mut <- s.mut[,c("Chromosome","Start_Position","End_Position","Variant_Classification","Hugo_Symbol")]
    s.mut <- unique(s.mut)
    typeNames <- unique(s.mut[,4])
    type <- c(4:1)
    names(type) <- typeNames[1:4]
    Type <- type[s.mut[,4]]
    s.mut <- cbind(s.mut,Type)
    s.mut <- s.mut[,c(1:3,6,4,5)]
    
    # Load recurrent CNV data for selected cancer (e.g. "LGG")
    load("LGG_CNV_results.rda")
    # Prepare selected sample CNV data for circos plot
    s.cnv <- as.data.frame(RecCNV[RecCNV[,"q-value"] <= 0.0001,c(1:4,6)])
    s.cnv <- s.cnv[,c(1,3,4,2)]
    s.cnv[s.cnv$Chromosome == 23,"Chromosome"] <- "X"
    s.cnv[s.cnv$Chromosome == 24,"Chromosome"] <- "Y"
    Chromosome <- paste0("chr",s.cnv[,1])
    s.cnv <- cbind(Chromosome, s.cnv[,-1])
    s.cnv[,1] <- as.character(s.cnv[,1])
    s.cnv[,4] <- as.character(s.cnv[,4])
    s.cnv <- cbind(s.cnv,CNV=1)
    colnames(s.cnv) <- c("Chromosome","Start_position","End_position","Aberration_Kind","CNV")
    
    library(circlize)
    # Draw genomic circos plot
    par(mar=c(1,1,1,1), cex=1)
    circos.initializeWithIdeogram()
    # Add CNV results
    colors <- c("forestgreen","firebrick")
    names(colors)  <- c(0,1)
    circos.genomicTrackPlotRegion(s.cnv,  ylim = c(0,1.2),
                                  panel.fun = function(region, value, ...) {
                                    circos.genomicRect(region, value, ytop.column = 2, ybottom = 0,
                                                       col = colors[value[[1]]], 
                                                       border="white")
                                    cell.xlim = get.cell.meta.data("cell.xlim")
                                    circos.lines(cell.xlim, c(0, 0), lty = 2, col = "#00000040")
                                  })
    # Add mutation results
    colors <- c("blue","green","red","gold")
    names(colors)  <- typeNames[1:4]
    circos.genomicTrackPlotRegion(s.mut, ylim = c(1.2,4.2),
                                  panel.fun = function(region, value, ...) {
                                    circos.genomicPoints(region, value, cex = 0.8, pch = 16, col = colors[value[[2]]], ...)
                                  })
    
    circos.clear()
    
    legend(-0.2, 0.2, bty="n", y.intersp=1, c("Amp","Del"), pch=15, 
           col=c("firebrick","forestgreen"), title="CNVs", text.font=1, cex=0.4, title.adj=0)
    legend(-0.2, 0, bty="n", y.intersp=1, names(colors), pch=16, 
           col=colors, title="Mutations", text.font=1, cex=0.4, title.adj=0)
    
    
    par(mar=c(1,1,1,1),cex=1.5)
    circos.par("start.degree" = 90, canvas.xlim = c(0, 1), canvas.ylim = c(0, 1), 
               gap.degree = 270, cell.padding = c(0, 0, 0, 0), track.margin = c(0.005, 0.005))
    circos.initializeWithIdeogram(chromosome.index = "chr17")
    circos.par(cell.padding = c(0, 0, 0, 0))
    # Add CNV results
    colors <- c("forestgreen","firebrick")
    names(colors)  <- c(0,1)
    circos.genomicTrackPlotRegion(s.cnv,  ylim = c(0,1.2),
                                  panel.fun = function(region, value, ...) {
                                    circos.genomicRect(region, value, ytop.column = 2, ybottom = 0,
                                                       col = colors[value[[1]]], 
                                                       border="white")
                                    cell.xlim = get.cell.meta.data("cell.xlim")
                                    circos.lines(cell.xlim, c(0, 0), lty = 2, col = "#00000040")
                                  })
    
    # Add mutation results representing single genes
    genes.mut <- paste0(s.mut$Hugo_Symbol,"-",s.mut$Type)
    s.mutt <- cbind(s.mut,genes.mut)
    n.mut <- table(genes.mut)
    idx <- !duplicated(s.mutt$genes.mut)
    s.mutt <- s.mutt[idx,]
    s.mutt <- cbind(s.mutt,num=n.mut[s.mutt$genes.mut])
    genes.num <- paste0(s.mutt$Hugo_Symbol," (",s.mutt$num.Freq,")")
    s.mutt <- cbind(s.mutt[,-c(6:8)],genes.num)
    s.mutt[,6] <- as.character(s.mutt[,6])
    s.mutt[,4] <- s.mutt[,4]/2
    s.mutt$num.Freq <- NULL
    colors <- c("blue","green","red","gold")
    names(colors)  <- typeNames[1:4]
    circos.genomicTrackPlotRegion(s.mutt, ylim = c(0.3,2.2), track.height = 0.05,
                                  panel.fun = function(region, value, ...) {
                                    circos.genomicPoints(region, value, cex = 0.4, pch = 16, col = colors[value[[2]]], ...)
                                  })
    
    circos.genomicTrackPlotRegion(s.mutt, ylim = c(0, 1), track.height = 0.1, bg.border = NA)
    i_track = get.cell.meta.data("track.index")
    
    circos.genomicTrackPlotRegion(s.mutt, ylim = c(0,1),
                                  panel.fun = function(region, value, ...) {
                                    circos.genomicText(region, value, 
                                                       y = 1, 
                                                       labels.column = 3,
                                                       col = colors[value[[2]]],
                                                       facing = "clockwise", adj = c(1, 0.5),
                                                       posTransform = posTransform.text, cex = 0.4, niceFacing = TRUE)
                                  }, track.height = 0.1, bg.border = NA)
    
    circos.genomicPosTransformLines(s.mutt,
                                    posTransform = function(region, value)
                                      posTransform.text(region, 
                                                        y = 1, 
                                                        labels = value[[3]],
                                                        cex = 0.4, track.index = i_track+1),
                                    direction = "inside", track.index = i_track)
    
    circos.clear()
    
    legend(0.25, 0.2, bty="n", y.intersp=1, c("Amp","Del"), pch=15, 
           col=c("firebrick","forestgreen"), title="CNVs", text.font=1, cex=0.4, title.adj=0)
    legend(0, 0.2, bty="n", y.intersp=1, names(colors), pch=16, 
           col=colors, title="Mutations", text.font=1, cex=0.4, title.adj=0)
    

    未完待续……

    相关文章

      网友评论

        本文标题:Bioconductor --- TCGA Workflow

        本文链接:https://www.haomeiwen.com/subject/ouurshtx.html