美文网首页
GSEA&GSVA分析-单细胞转录组高阶分析03

GSEA&GSVA分析-单细胞转录组高阶分析03

作者: 一车小面包人 | 来源:发表于2023-09-25 16:59 被阅读0次

    背景:分析单细胞转录组中不同细胞类型的特定基因集的差异表达。

    • 1.GSEA分析
      下载并加载会用到的R包:
    library(tidyverse)
    library(data.table)
    library(org.Hs.eg.db)
    library(clusterProfiler)
    library(biomaRt)
    library(enrichplot)
    library(dplyr)
    library(GSEABase)
    library(Seurat)
    library(ggsci)
    

    函数代码:

    #' sc the seurat object
    #' group_name the Idents(sc)
    #' group_1 the FindMarkers ident.1
    #' group_2 the FindMarkers ident.2
    my_GSEA<-function(sc,group_name,group_1,group_2,min.pct=0.25,logfc.threshold = 0.25,thresh.use = 0.99,gmtpath="./geneset_gmt/",gmt=NULL,nrow_all=10,nrow_down=10,nrow_up=10){
    #' 数据预处理
    #sc<-readRDS("./sc_umap.rds")
    dir.create("./GSEA_results")
    sc<-sc
    Idents(sc)<-sc@meta.data[,group_name]
    deg<-FindMarkers(sc,ident.1=group_1,ident.2=group_2,min.pct=min.pct,logfc.threshold = logfc.threshold)
    deg$gene<-rownames(deg)
    sub.deg<-deg[,c("gene","avg_log2FC")]
    genelist_input<-sub.deg
    genename <- as.character(genelist_input[,1])
    gene_map<-bitr(genename,fromType="SYMBOL",toType="ENTREZID",OrgDb = "org.Hs.eg.db")
    colnames(genelist_input)<-c("SYMBOL","avg_log2FC")
    final.gene<-left_join(gene_map,genelist_input,"SYMBOL")
    final.gene<-final.gene%>%arrange(desc(avg_log2FC))
    geneList=final.gene$avg_log2FC
    names(geneList)<-as.character(final.gene$ENTREZID)
    
    #' GSEA分析-GO
    Go_gseresult <- gseGO(geneList, 'org.Hs.eg.db', keyType = "ENTREZID", ont="all", nPerm = 1000, minGSSize = 10, maxGSSize = 1000, pvalueCutoff=1)
    #' GSEA分析-KEGG
    KEGG_gseresult <- gseKEGG(geneList, nPerm = 1000, minGSSize = 10, maxGSSize = 1000, pvalueCutoff=1)
    #' GSEA分析-Reactome
    Go_Reactomeresult <- gsePathway(geneList, nPerm = 1000, minGSSize = 10, maxGSSize = 1000, pvalueCutoff=1)
    #' 保存结果文件 csv
    write.table (Go_gseresult, file ="./GSEA_results/Go_gseresult.csv", sep =",", row.names =TRUE)
    write.table (KEGG_gseresult, file ="./GSEA_results/KEGG_gseresult.csv", sep =",", row.names =TRUE)
    write.table (Go_Reactomeresult, file ="./GSEA_results/Go_Reactomeresult.csv", sep =",", row.names =TRUE)
    
    #' 绘图
    mycol<-pal_nejm()(8)
    pdf("./GSEA_results/Go_gseresult.pdf",height=10,width=12)
    lapply(1:nrow_all, function(i){
      pp<-gseaplot2(Go_gseresult,Go_gseresult$ID[i]
                title=Go_gseresult$Description[i],pvalue_table = T)
      print(pp)
    })
    dev.off()
    pdf("./GSEA_results/KEGG_gseresult.pdf",height=10,width=12)
    lapply(1:nrow_all, function(i){
      pp<-gseaplot2(KEGG_gseresult,KEGG_gseresult$ID[i],
                title=KEGG_gseresult$Description[i],pvalue_table = T)
      print(pp)
    })
    dev.off()
    pdf("./GSEA_results/Go_Reactomeresult.pdf",height=10,width=12)
    lapply(1:nrow_all, function(i){
      pp<-gseaplot2(Go_Reactomeresult,Go_Reactomeresult$ID[i],
                title=Go_Reactomeresult$Description[i],pvalue_table = T)
      print(pp)
    })
    dev.off()
    

    使用msigdb数据库的基因集,需要先去msigdbGSEA | MSigDB (gsea-msigdb.org)官网下载基因集信息。

    genesets.png
    下载完的基因集保存在文件夹geneset_gmt中:
    geneset_gmt.png
    接着使用该数据库的基因集进行GSEA分析:
    #' msigdb数据库
    if(!is.null(gmt)){
    gmtfiles<-list.files(gmtpath)
    geneset<-read.gmt(paste0("./geneset_gmt/",gmt,".all.v2023.1.Hs.entrez.gmt"))
    egmt <- GSEA(geneList, TERM2GENE=geneset,
                   minGSSize = 1,
                   pvalueCutoff = 0.99,
                   verbose=FALSE)
    gsea_results_df <- egmt@result
    write.csv(gsea_results_df,file = paste0('./GSEA_results/',gmt,'_gsea_results_df.csv'))
    down<-gsea_results_df[gsea_results_df$pvalue<0.05 & gsea_results_df$enrichmentScore < -0.3,]
    down$group=-1
    up<-gsea_results_df[gsea_results_df$pvalue<0.05 & gsea_results_df$enrichmentScore > 0.3,]
    up$group=1
    pro=paste0('GSEA_',gmt)
    if(nrow(down)>0){
    nrow_down=ifelse(nrow_down<nrow(down),nrow_down,nrow(down))
    pdf(paste0('./GSEA_results/',pro,'_down.pdf'),height=10,width=12)
    lapply(1:nrow_down, function(i){
      pp<-gseaplot2(egmt,down$ID[i],
                title=down$Description[i],pvalue_table = T)
      print(pp)
    })
    dev.off()
    }
    if(nrow(up)>0){
    nrow_up=ifelse(nrow_up<nrow(up),nrow_up,nrow(up))
    pdf(paste0('./GSEA_results/',pro,'_up.pdf'),height=10,width=12)
    lapply(1:nrow_up, function(i){
      pp<-gseaplot2(egmt,up$ID[i],
                title=up$Description[i],pvalue_table = T)
      print(pp)
    })
    dev.off()
    }
    }
    }
    

    结果文件:


    results.png
    results.png
    • 2.GSVA分析
      加载用到的包:
    library(Seurat)
    library(tidyverse)
    library(ggplot2)
    library(ggsci)
    library(org.Hs.eg.db)
    library(clusterProfiler)
    library(GSVA)
    library(limma)
    

    函数代码:

    my_GSVA<-function(sc,nor="log",gmt="h",group_list_object="groups",barplot=T,sample_number=20){
    sc<-sc
    #' 数据预处理
    #sc<-readRDS("sc_umap.rds")
    if(nor=="log"){
    expr<-as.data.frame(sc$RNA@data)
    }else{
    expr<-as.data.frame(sc$SCT@counts)
    }
    expr$ID<-rownames(expr)
    s2e<-bitr(expr$ID,fromType="SYMBOL",toType="ENTREZID",OrgDb = "org.Hs.eg.db")
    expr<-inner_join(expr,s2e,by=c("ID"="SYMBOL"))
    rownames(expr)<-expr$ENTREZID
    expr<-expr[,-c((ncol(expr)-1),ncol(expr))]
    mycol<-pal_nejm()(8)
    geneset<-read.gmt(paste0("./geneset_gmt/",gmt,".all.v2023.1.Hs.entrez.gmt"))
    geneset_list<-split(geneset$gene,geneset$term) #' 通过geneset$term对geneset$gene进行分组
    expr<-as.matrix(expr)
    #' GSVA
    gsval<-gsva(expr,geneset_list,kcdf="Gaussian",method="gsva",parallel.sz=12)
    dir.create("./GSVA_results")
    save(gsval,file="./GSVA_results/gsval.rds")
    

    基于GSVA的差异分析:

    #' DEGSVA
    meta<-as.data.frame(sc@meta.data[,group_list_object,drop=F])
    mydata<-t(as.data.frame(gsval))%>%as.data.frame()
    mydata<-merge(meta,mydata,by.x=0,by.y=0)
    rownames(mydata)<-mydata$Row.names
    mydata<-mydata[,c(-1)] #' 只留下了groups列,删除了Row.names/orig.ident和seurat_clusters列
    mydata<-mydata%>%arrange(desc(groups)) #' 排序
    group_list=factor(mydata[,group_list_object])
    design<-model.matrix(~0+group_list)
    colnames(design)<-levels(group_list)
    rownames(design)<-rownames(mydata)
    contrast.matrix<-makeContrasts(paste0(rle(mydata[,group_list_object])$values[1],'-',rle(mydata[,group_list_object])$values[2]),levels=design)
    fit<-lmFit(t(mydata[,-1]),design=design)
    fit2<-contrasts.fit(fit,contrast.matrix)
    fit2<-eBayes(fit2)
    alldiff<-topTable(fit2,coef=1,n=Inf)
    #sigdiff<-subset(alldiff,abs(logFC)>0.5&adj.P.Val<0.05)
    

    绘制热图:

    #' 绘制热图
    alldiff<-alldiff[order(abs(alldiff$logFC),decreasing=T),]
    plotdata<-t(mydata[,rownames(alldiff[1:20,])])
    library(ComplexHeatmap)
    library(circlize)
    Groups<-mycol[c(1:2)]
    names(Groups)<-c(rle(mydata[,group_list_object])$values[1],rle(mydata[,group_list_object])$values[2])
    Top<-HeatmapAnnotation(Group=factor(mydata[,group_list_object]),
                            border=T,
                            col=list(Group=Groups),
                            show_annotation_name=T,
                            annotation_name_side="left",
                            annotation_name_gp=gpar(fontsize=10))
    pdf("./GSVA_results/GSVA_heatmap.pdf",height=10,width=12)
    pp<-Heatmap(plotdata,top_annotation=Top,cluster_rows=F,cluster_columns=F,border=T,
                    row_names_side="left",column_order=NULL,show_column_names=F,
                    column_split=c(rep(rle(mydata[,group_list_object])$values[1],rle(mydata[,group_list_object])$lengths[1]),rep(rle(mydata[,group_list_object])$values[2],rle(mydata[,group_list_object])$lengths[2])),
                    row_names_gp = gpar(fontsize = 8))
    print(pp)
    dev.off()
    

    绘制条形图:

    if(barplot&gmt=="h"){
    library(stringr)
    library(ggplot2)
    library(ggprism)
    library(ggthemes)
    Diff<-alldiff
    Diff_sample<-sample_n(Diff,sample_number, replace = FALSE)
    dat_plot <- data.frame(id = row.names(Diff_sample),t = Diff_sample$t)
    dat_plot$id <- str_replace(dat_plot$id , "HALLMARK_","")
    dat_plot$threshold = factor(ifelse(dat_plot$t  >-2, ifelse(dat_plot$t >= 2 ,'Up','Stable'),'Down'),levels=c('Up','Down','Stable'))
    dat_plot <- dat_plot %>% arrange(t)
    dat_plot$id <- factor(dat_plot$id,levels = dat_plot$id)
    p <- ggplot(data = dat_plot,aes(x = id,y = t,fill = threshold)) +  geom_col()+  coord_flip() +  scale_fill_manual(values = c('Up'= '#36638a','NoSignifi'='#cccccc','Down'='#7bcd7b')) +  geom_hline(yintercept = c(-2,2),             color = 'white',             size = 0.5,lty='dashed') +  xlab('') +   ylab(paste0('t value of GSVA score,',rle(mydata[,group_list_object])$values[1],'-VS-',rle(mydata[,group_list_object])$values[2]) )+ guides(fill=F)+theme_prism(border = T) +  theme(    axis.text.y = element_blank(),    axis.ticks.y = element_blank())
    low1 <- dat_plot %>% filter(t < -2) %>% nrow()
    #low0 <- dat_plot %>% filter( t < 0) %>% nrow()
    #high0 <- dat_plot %>% filter(t>0)%>%filter(t<2) %>% nrow()
    low0 <- dat_plot %>% filter(t>-2)%>%filter( t < 2) %>% nrow()
    high1 <- nrow(dat_plot)
    pdf("./GSVA_results/GSVA_barplot.pdf",height=10,width=12)
    pp<-p + geom_text(data = dat_plot[1:low1,],aes(x = id,y = 0.1,label = id), hjust = 0,color = 'black',size = 2.5) + geom_text(data = dat_plot[(low1 +1):(low1+low0),],aes(x = id,y = 0.1,label = id), hjust = 0,color = 'grey',size = 2.5) +geom_text(data = dat_plot[(low1+low0+1):high1,],aes(x = id,y = -0.1,label = id), hjust = 1,color = 'black',size = 2.5)
    print(pp)
    dev.off()
    }
    

    结果文件:


    results.png
    heatmap.png
    barplot.png

    总结:再见单细胞。

    相关文章

      网友评论

          本文标题:GSEA&GSVA分析-单细胞转录组高阶分析03

          本文链接:https://www.haomeiwen.com/subject/gikcbdtx.html