美文网首页TCGA
突变数据的处理及柱状图的使用

突变数据的处理及柱状图的使用

作者: 碌碌无为的杰少 | 来源:发表于2021-01-19 15:31 被阅读0次

    突变数据其实做的不多,代码什么的也较少,其实也比较简单,现在就整理了一下,然后得到了柱形图的代码,一并奉上。

    下载数据

    options(stringsAsFactors = F) 
    require(maftools) 
    require(dplyr)
    COAD = read.maf(maf = 'TCGA.COAD.mutect.03652df4-6090-4f5a-a2ff-ee28a37f9301.DR-10.0.somatic.maf.gz')
    info_merge=data.frame(id=COAD@clinical.data$Tumor_Sample_Barcode,
                          sample=substring(COAD@clinical.data$Tumor_Sample_Barcode,1,15))
    
    

    自带的包

    maf_df = COAD@data
    plotmafSummary(maf = COAD, rmOutlier = TRUE,
                   #showBarcodes = FALSE,
                   addStat = 'median', dashboard = TRUE, titvRaw = FALSE)
     COAD@variants.per.sample
    maf_df %>% filter(Hugo_Symbol=="EGFR") %>%
                        count(Variant_Classification,sort = T)
    count(maf_df,Hugo_Symbol,sort = T)[1:30]
    

    ·#分组处理

    dd1 <- Last1[,c('CIC','sampleID')]
    colnames(dd1)[2] <- 'sample'
    info_merge1 <- inner_join(info_merge,dd1,by='sample')
    
    
    
    A.name=as.character(subset(info_merge1,CIC=="A")$id)
    B.name=as.character(subset(info_merge1,CIC=="B")$id)
    C.name=as.character(subset(info_merge1,CIC=="C")$id)
    D.name=as.character(subset(info_merge1,CIC=="D")$id)
    
    A=subsetMaf(maf = COAD,tsb=A.name,isTCGA = F)
    B=subsetMaf(maf = COAD,tsb=B.name,isTCGA = F)
    C=subsetMaf(maf = COAD,tsb=C.name,isTCGA = F)
    D=subsetMaf(maf = COAD,tsb=D.name,isTCGA = F)
    length(unique(A@data$Tumor_Sample_Barcode))
    length(unique(B@data$Tumor_Sample_Barcode))
    length(unique(C@data$Tumor_Sample_Barcode))
    length(unique(D@data$Tumor_Sample_Barcode))
    
    
    col = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
    names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins',
                   'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del')
    oncoplot(maf = COAD ,writeMatrix = T,top=30, colors = col,fontSize = 1)
    COAD1 = read.table("onco_matrix.txt", 
                      header = TRUE, 
                      stringsAsFactors = FALSE, sep = "\t")
    
    oncoplot(maf = A , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
    oncoplot(maf = B , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
    oncoplot(maf = C , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
    oncoplot(maf = D , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
    
    图片.png

    柱形图和TMB

    tmp <- Last1$MSI.Status
    tmp <- Last1[,c('CIC','MSI.Status','Stage')]
    df <- data.frame()
    col21 = c("#E11E24","#92D0E6","#F5949B","#FBB96F","#007AB8","#A2D184","#00A04E",
              "#BC5627","#0080BD","#EBC379","#A74D9D","#F57E21","#00998F","#F4E708",
              "#F184B3","#9B9A99","#6CC7C0","#BCB9DA","#F37C75","#5FB0D4","#F9AD62")
    
    for(i in unique(tmp$CIC)){
    df_i <- subset(tmp, tmp$CIC==i) %>% pull(MSI.Status) %>% table()
                        df_i <- data.frame(sample=rep(i, length(df_i)), value=df_i)
                        names(df_i) <- c("CIC", "MSI.Status", "value")
                        df <- rbind(df, df_i)
                         }
    
    p1 <- ggplot(df, aes(x=CIC, y=value, fill=MSI.Status)) + 
                        geom_bar(stat= "identity", position = "fill") + 
                        scale_fill_manual(values = col21 ) +
                        labs(x = 'MSI.Status', y = 'Relative Abundance', title = 'Samples composition') +
                        theme(panel.grid = element_blank(), panel.background = element_blank(), 
                              axis.text.x = element_text(angle=45))
    p1
    pdf("p1.pdf",width = 6,height = 7)
    print(p1)
    dev.off()
    
    tmp <- Last1$MSI.Status
    
    tmp <- Last1[,c('CIC','MSI.Status','Stage')]
    df <- data.frame()
    for(i in unique(tmp$CIC)){
                        df_i <- subset(tmp, tmp$CIC==i) %>% pull(Stage) %>% table()
                        df_i <- data.frame(sample=rep(i, length(df_i)), value=df_i)
                        names(df_i) <- c("CIC", "Stage", "value")
                        df <- rbind(df, df_i)
    }
    
    
    p2 <- ggplot(df, aes(x=CIC, y=value, fill=Stage)) + 
                        geom_bar(stat= "identity", position = "fill") + 
                        scale_fill_manual(values = col21 ) +
                        labs(x = 'Stage', y = 'Relative Abundance', title = 'Samples composition') +
                        theme(panel.grid = element_blank(), panel.background = element_blank(), 
                              axis.text.x = element_text(angle=45))
    p2
    pdf("p2.pdf",width = 6,height = 7)
    print(p2)
    dev.off()
    
    DD2 <- COAD@variants.per.sample
    DD2$Variants <- DD2$Variants/35
    colnames(DD2)[1] <- 'sample'
    DD2$sample <- substring(DD2$sample,1,15)
    info_merge2$group <- ifelse(info_merge2$TMB> 10,'high','low')
    tmp <- info_merge2[,c('CIC','group')]
    df <- data.frame()
    
    for(i in unique(tmp$CIC)){
                        df_i <- subset(tmp, tmp$CIC==i) %>% pull(group) %>% table()
                        df_i <- data.frame(sample=rep(i, length(df_i)), value=df_i)
                        names(df_i) <- c("CIC", "group", "value")
                        df <- rbind(df, df_i)
    }
    
    p3 <- ggplot(df, aes(x=CIC, y=value, fill=group)) + 
                        geom_bar(stat= "identity", position = "fill") + 
                        scale_fill_manual(values = c('#E11E24',"#92D0E6") ) +
                        labs(x = 'MSI.Status', y = 'Relative Abundance', title = 'Samples composition') +
                        theme(panel.grid = element_blank(), panel.background = element_blank(), 
                              axis.text.x = element_text(angle=45))
    
    p3
    pdf("p3.pdf",width = 6,height = 7)
    print(p3)
    dev.off()
    
    图片.png
    图片.png

    boxplot

    my_comparisons <- list(
                        c("D", "B"), c("D", "C"),
                        c("D", "A")
    )
    
    # Create the box plot. Change colors by groups: Species
    # Add jitter points and change the shape by groups
    ggboxplot(
                        info_merge2, x = "CIC", y = "TMB",
                        color = "CIC", palette = 'nejm',
                        add = "jitter"
    )+
                        #scale_y_continuous(limits=c(0, 90), breaks=c(0, 30, 60, 90))+
                        stat_compare_means(comparisons = my_comparisons, method = "t.test")
    
    
    
    图片.png

    相关文章

      网友评论

        本文标题:突变数据的处理及柱状图的使用

        本文链接:https://www.haomeiwen.com/subject/icoqzktx.html