单细胞的GO KEGG GSEA GSVA分析

作者: 碌碌无为的杰少 | 来源:发表于2021-01-31 16:32 被阅读0次

    今天抽个空把单细胞领域的GO KEGG GSEA GSVA都做了一遍,跑完以后的感觉就是有这样的理解,就是单细胞我首先会选择做GSVA,但是我还会顺便做下kegg或者GSEA,因为我想看一下差异通路到底有哪些基因富集,从头到脚跑一遍后,可能对通路的富集分析有了更深的理解。

    数据

    load('singleBB.Rdata')
    view(experiment.aggregate@meta.data)
    experiment.aggregate@meta.data$celltype4 = "NA"
    # 更改 celltype 信息,设置细胞群新名称
    experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$celltype3 %in% c('GC B cells','Memory B cells','Naive B cells')), "celltype4"] = "CD20+ B cells"
    experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$celltype3 %in% c('IGA+ Plasma','IGG+ Plasma')), "celltype4"] = "Plasma B cells"
    cells1 <- subset(experiment.aggregate@meta.data, celltype4 %in% c("CD20+ B cells"))  %>% rownames()
    cells2 <- subset(experiment.aggregate@meta.data, celltype4 %in%  c("Plasma B cells"))  %>% rownames()
    deg <- FindMarkers(experiment.aggregate, ident.1 = cells1, ident.2 = cells2)
    deg <- data.frame(gene = rownames(deg), deg)
    deg1 <- deg
    logFC_t=0.5
    P.Value_t = 0.05
    k1 = (deg1$p_val_adj < P.Value_t)&(deg1$avg_logFC < -logFC_t)
    k2 = (deg1$p_val_adj < P.Value_t)&(deg1$avg_logFC > logFC_t)
    table(k1)
    table(k2)
    change = ifelse(k1,"down",ifelse(k2,"up","stable"))
    deg1$change <- change
    library(ggplot2)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    library(dplyr)
    s2e <- bitr(deg1$gene, 
                fromType = "SYMBOL",
                toType = "ENTREZID",
                OrgDb = org.Hs.eg.db)#人类 转换成ENTREZID
    #其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
    deg1 <- inner_join(deg1,s2e,by=c("gene"="SYMBOL"))
    
    #source("kegg_plot_function.R")
    #source表示运行整个kegg_plot_function.R脚本,里面是一个function
    #以up_kegg和down_kegg为输入数据做图
    
    #1.GO database analysis ----
    
    #(1)输入数据
    gene_up = deg1[deg1$change == 'up','ENTREZID'] 
    gene_down = deg1[deg1$change == 'down','ENTREZID'] 
    gene_diff = c(gene_up,gene_down)
    gene_all = deg1[,'ENTREZID']
    

    GO分析

    if(T){
      #细胞组分
      ego_CC <- enrichGO(gene = gene_up,
                         OrgDb= org.Hs.eg.db,
                         ont = "CC",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      #生物过程
      ego_BP <- enrichGO(gene = gene_up,
                         OrgDb= org.Hs.eg.db,
                         ont = "BP",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      #分子功能:
      ego_MF <- enrichGO(gene = gene_up,
                         OrgDb= org.Hs.eg.db,
                         ont = "MF",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      save(ego_CC,ego_BP,ego_MF,file = "GO.Rdata")
    }
    if(T){
      #细胞组分
      ego_CC <- enrichGO(gene = gene_down,
                         OrgDb= org.Hs.eg.db,
                         ont = "CC",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      #生物过程
      ego_BP <- enrichGO(gene = gene_down,
                         OrgDb= org.Hs.eg.db,
                         ont = "BP",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      #分子功能:
      ego_MF <- enrichGO(gene = gene_down,
                         OrgDb= org.Hs.eg.db,
                         ont = "MF",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      save(ego_CC,ego_BP,ego_MF,file = "GODOWN.Rdata")
    }
    load('GODOWN.Rdata')
    ggg <-  ego_MF@result
    #(3)可视化
    #条带图
    dotplot(ego_MF,showCategory=13)
    barplot(ego_BP)
    if(F){
      go <- enrichGO(gene = gene_up, OrgDb = "org.Hs.eg.db", ont="all")
    }
    ##save(go,file="go.Rdata")
    ### 直接加载我保存的数据
    
    library(ggplot2)
    p <- dotplot(go, split="ONTOLOGY") +facet_grid(ONTOLOGY~., scale="free")
    p
    
    图片.png

    KEGG

    if(T){
      kk.up <- enrichKEGG(gene         = gene_up,
                          organism     = 'hsa',
                          universe     = gene_all,
                          pvalueCutoff = 0.9,
                          qvalueCutoff = 0.9)
      kk.down <- enrichKEGG(gene         =  gene_down,
                            organism     = 'hsa',
                            universe     = gene_all,
                            pvalueCutoff = 0.9,
                            qvalueCutoff =0.9)
      kk.diff <- enrichKEGG(gene         = gene_diff,
                            organism     = 'hsa',
                            pvalueCutoff = 0.9)
      save(kk.diff,kk.down,kk.up,file = "GSE4107kegg.Rdata")
    }
    load("GSE4107kegg.Rdata")
    #(3)从富集结果中提取出结果数据框
    ekegg <- setReadable(kk.up, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
    kegg_diff_dt <- data.frame(ekegg)
    p1 <- barplot(ekegg, showCategory=10)
    p2 <- dotplot(ekegg, showCategory=10)
    plotc = p1/p2
    
    down_kegg <- kk.down@result %>%
      filter(pvalue<0.05) %>% #筛选行
      mutate(group=-1) #新增列
    
    up_kegg <- kk.up@result %>%
      filter(pvalue<0.01) %>%
      mutate(group=1)
    #(5)可视化
    g_kegg <- kegg_plot(up_kegg,down_kegg)
    g_kegg 
    g_kegg +scale_y_continuous(labels = c(15,10,5,0,5,10,15,20,25,30))
    
    图片.png

    GSEA

    library(GSEABase) 
    library(clusterProfiler)
    library(DOSE)
    library(org.Hs.eg.db)
    library(ggplot2)
    library(stringr)
    options(stringsAsFactors = F)
    
    data(geneList)
    geneList = deg1$avg_logFC
    names(geneList) = deg1$ENTREZID
    geneList = sort(geneList,decreasing = T)
    
    #准备gmt文件
    geneset <- read.gmt("c2.cp.kegg.v7.2.entrez.gmt")  
    geneset$ont = str_remove(geneset$ont,"HALLMARK_")
    egmt <- GSEA(geneList, TERM2GENE=geneset,verbose=F)
    #> Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize = minGSSize, : There are ties in the preranked stats (0.05% of the list).
    #> The order of those tied genes will be arbitrary, which may produce unexpected results.
    egmt2<- setReadable(egmt,OrgDb=org.Hs.eg.db, keyType = "ENTREZID")
    y=data.frame(egmt2)
    #气泡图,展示geneset被激活还是抑制
    dotplot(egmt2,split=".sign")+facet_grid(~.sign)
    #dotplot(egmt2,split=".sign")+facet_grid(~.sign)+
    #scale_y_discrete(labels=function(x) str_wrap(str_replace_all(x,"_"," "), width=30))
    #  scale_y_discrete(labels=function(x) str_remove(x,"HALLMARK_"))
    #dotplot(y,showCategory=20,split=".sign")+facet_grid(~.sign)
    
    library(enrichplot)
    gseaplot2(egmt, geneSetID = 1, title = egmt$Description[1])
    #多个gnenset合并展示
    gseaplot2(egmt, geneSetID = 1:3,pvalue_table = T)
    #gseaplot2(egmt,2,color = "blue",pvalue_table = T)
    gseaplot2(egmt2, geneSetID = 'KEGG_PROTEIN_EXPORT',pvalue_table = T)
    
    
    
    
    
    for(i in seq_along(egmt@result$ID)){
      p <- gseaplot(egmt, geneSetID = i, title = egmt@result$ID[i], by = "runningScore")
      filename <- paste0('i love you', egmt@result$ID[i], '.png')
      ggsave(filename = filename, p, width = 8, height = 4)
    }
    # 美化版
    if(F){
      library(enrichplot)
      for(i in seq_along(egmt@result$ID)){
        p <- gseaplot2(egmt, geneSetID = i, title = egmt@result$ID[i])
        filename <- paste0('i love you', egmt@result$ID[i], '.png')
        ggsave(filename = filename, p, width = 8, height = 5)
      }
    }
    
    图片.png

    GSVA

    我用的数据集是board公司http://www.gsea-msigdb.org/gsea/msigdb的kegg和reactome,主要是reactome有代谢通路,GSVA可以设置多核parallel.sz=12

    library(ggplot2)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    library(dplyr)
    setwd("~/project/")
    load('singleBB.Rdata')
    expr <- as.data.frame(experiment.aggregate@assays$RNA@data)
    expr$ID <- rownames(expr)
    s2e <- bitr(expr$ID, 
                fromType = "SYMBOL",
                toType = "ENTREZID",
                OrgDb = org.Hs.eg.db)#人类 转换成ENTREZID
    #其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
    expr <- inner_join(expr,s2e,by=c("ID"="SYMBOL"))
    rownames(expr) <- expr$ENTREZID
    expr <- expr[,-9150]
    expr <- expr[,-9149]
    meta <- as.data.frame(experiment.aggregate@meta.data[,c('orig.ident',"celltype3")])
    
    kegggmt1 <- read.gmt("c2.cp.kegg.v7.2.entrez.gmt")
    kegggmt2 <- read.gmt("c2.cp.reactome.v7.2.entrez.gmt")
    kegggmt <- rbind(kegggmt1,kegggmt2)
    colnames(kegggmt1)
    kegg_list = split(kegggmt$gene, kegggmt$term)
    library(GSVA)
    expr=as.matrix(expr)
    kegg2 <- gsva(expr, kegg_list, kcdf="Gaussian",method = "gsva",parallel.sz=12)
    write.table(data,"kegg2.txt",sep="\t",quote = F)
    data <- data.table::fread('kegg2.txt',data.table = F)
    rownames(data) <- data$V1
    data <- data[,-1]
    library(dplyr)
    meta <- meta %>%
      arrange(meta$celltype3)
    data <- data[,rownames(meta)]
    identical(colnames(data),rownames(meta))
    data$GC <- apply(data[,1:111], 1, mean)
    data$IGA <- apply(data[,112:4235], 1, mean)
    data$IGG <- apply(data[,4236:5504], 1, mean)
    data$memory <- apply(data[,5505:7797], 1, mean)
    data$naive <- apply(data[,7798:9148], 1, mean)
    table(meta$celltype3)
    test <- data[,9149:9153]
    pathway <- c('KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION','REACTOME_COSTIMULATION_BY_THE_CD28_FAMILY','KEGG_PROTEIN_EXPORT','KEGG_DNA_REPLICATION','KEGG_BASE_EXCISION_REPAIR','KEGG_CELL_CYCLE',
                 'REACTOME_GLUCOSE_METABOLISM','REACTOME_FATTY_ACID_METABOLISM','REACTOME_METABOLISM_OF_FAT_SOLUBLE_VITAMINS','REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES','REACTOME_SELENOAMINO_ACID_METABOLIS',
                 'KEGG_JAK_STAT_SIGNALING_PATHWAY','KEGG_TGF_BETA_SIGNALING_PATHWAY','KEGG_WNT_SIGNALING_PATHWAY','KEGG_MTOR_SIGNALING_PATHWAY')
    
    test1 <- test[pathway,c(5,4,1,2,3)]
    result3<- t(scale(t(test1)))
    result3[result3>2]=2
    result3[result3<-2]=-2
    library(pheatmap)
    G1 <- pheatmap(result3,
                    cluster_rows = F,
                    cluster_cols = F,
                    show_rownames = T,
                    show_colnames = T,
                    color =colorRampPalette(c("blue", "white","red"))(100),
                    cellwidth = 10, cellheight = 15,
                    fontsize = 10)
    pdf(("G1.pdf"),width = 7,height = 7)
    print(G1)
    dev.off()
    #test4 <- test3[grep(pattern="METABOLISM",rownames(test3)),]
    
    图片.png

    相关文章

      网友评论

        本文标题:单细胞的GO KEGG GSEA GSVA分析

        本文链接:https://www.haomeiwen.com/subject/dsoltltx.html