美文网首页R语言作业GO, KEGG, DO富集分析富集分析
【生信技能树】2020-01-08作业:GEO挖掘标准流程分析G

【生信技能树】2020-01-08作业:GEO挖掘标准流程分析G

作者: 猫叽先森 | 来源:发表于2020-01-10 17:39 被阅读0次

    题目出自【生信技能树】公众号文章:2011年的表达芯片分析和2019年的区别
    要求:走GEO标准分析流程分析GSE27447


    代码主要复制于Jimmy大佬的github:https://github.com/jmzeng1314/GEO/tree/master/GSE42872_main
    主要步骤:
    step1 - 读入GSE27447数据
    step2 - 检查表达矩阵
    step3 - 差异分析
    step4 - GO/KEGG数据库注释
    step5 - GSEA基因集富集分析(施工中)
    step6 - GSVA基因集变异分析(施工中)


    step1 - 读入GSE27447数据

    rm(list=ls())
    options(stringsAsFactors = F)
    
    library(AnnoProbe) # 生信技能树出品的GEO数据下载利器
    library(Biobase)
    gset <- geoChina("GSE27447")
    gse27447 <- gset[[1]]
    exprSet <- exprs(gse27447)
    boxplot(exprSet,las=2)
    
    boxplot_before

    根据boxplot可以得出,原始数据需要log2处理。

    exprSet <- log2(exprSet+1)
    boxplot(exprSet,las=2)
    
    boxplot_after_log2
    exprSet[1:4,1:4]
    #        GSM678364 GSM678365 GSM678366 GSM678367
    #7892501  3.221012  4.502222  1.271969  1.849167
    #7892502  6.909593  7.305615  6.956150  7.078983
    #7892503  5.125911  8.659878  5.395697  5.660467
    #7892504 11.045603 10.060453 10.845937 11.145225
    

    #行名是探针,列名是样本,中间的数据是某样本中某探针的表达量。

    gse27447@annotation
    #[1] "GPL6244"
    checkGPL(gse27447@annotation)
    #[1] TRUE 
    

    # checkGPL()结果是TRUE说明AnnoProbe包中存在"GPL6244" 平台数据,于是可以使用另外两个超级厉害的函数idmap()filterEM(),得到探针对应的基因名,然后把表达矩阵的探针名转换为基因名。

    ids <- idmap(gse27447@annotation)
    dat <- filterEM(exprSet,ids)
    dim(dat)
    #[1] 18837    19
    dat <- dat[order(rownames(dat)),]
    

    获取临床信息,从中进一步获取分组信息

    pd <- pData(gse27447)
    library(stringr)
    group_list=str_split(pd$title,' ',simplify = T)[,1]
    table(group_list)
    #group_list
    #non-triple     triple 
    #        14          5 
    

    所以是5个三阴乳腺癌样本,14个非三阴乳腺癌样本。
    把表达矩阵和分组信息保存到Rdata文件。

    save(dat,group_list,file = 'step1-output.Rdata')
    

    step2 - 检查表达矩阵

    rm(list = ls())  ##清空Enviroment
    options(stringsAsFactors = F)
    load('step1-output.Rdata') ##读入第一步得到的数据,然后检查数据
    
    table(group_list)
    #group_list
    #non-triple     triple 
    #        14          5 
    dat[1:4,1:4]
    #        GSM678364 GSM678365 GSM678366 GSM678367
    #A1CF     6.654092  6.074542  6.258994  6.609746
    #A2M     10.266259 11.034813 10.494956 11.450283
    #A2ML1    6.375017  6.118954  5.683851  4.278468
    #A3GALT2  4.424096  4.144055  4.906982  5.065266
    

    2.1 样本相关性分析

    dim(dat)
    #[1] 18837    19
    ac=data.frame(groups=group_list)
    rownames(ac)=colnames(dat) #把ac的行名给到n的列名,即对每一个探针标记上分组信息
    pheatmap(cor(dat),annotation_col = ac)
    
    heatmap - cor.png

    2.2 主成分分析(PCA)

    # 我们是对样本做主成分分析,所以要求行名是样本名,列名是探针(基因)名,所以需要对表达矩阵进行转置

    dat=t(dat) 
    dat=as.data.frame(dat)
    dat=cbind(dat,group_list) ##给表达矩阵加上分组信息
    
    library("FactoMineR")
    library("factoextra") 
    dat.pca <- PCA(dat[,-ncol(dat)], graph = FALSE)#dat最后一列是group_list。pca分析需要一个纯数值矩阵,所以将dat最后一列去掉以后赋值给dat.pca
    (fviz_pca_ind(dat.pca,geom.ind = "point",
                 col.ind = dat$group_list,
                 palette = c("#00AFBB", "#E7B800"),
                 addEllipses = TRUE,
                 legend.title = "Groups"
    ))
    ggsave('all_samples_PCA.png')
    
    all_samples_PCA.png

    可以看到,两组样本并没有分开╮(╯▽╰)╭
    不过大家都是肿瘤样本,相似性较高也可以理解。

    2.3 取sd最大的1000个基因画热图

    rm(list = ls())  ## 魔幻操作,一键清空~
    load(file = 'step1-output.Rdata')
    dat[1:4,1:4] 
    cg=names(tail(sort(apply(dat,1,sd)),1000))
    

    ##使用 apply()函数计算获取表达矩阵dat每行(即每个基因表达量)的方差,从小到大排序后,取最大的1000个方差,获取其对应的基因名,赋值给变量cg
    ##然后就可以对这1000个基因画热图

    library(pheatmap)
    pheatmap(dat[cg,],show_colnames =F,show_rownames = F)
    
    heatmap - cg.png

    优化一下,比如对dat[cg,]归一化:

    n=t(scale(t(dat[cg,]))) 
    n[n>2]=2 
    n[n< -2]= -2
    n[1:4,1:4]
    pheatmap(n,show_colnames =F,show_rownames = F)
    
    heatmap - n.png

    给样本添加分组信息:

    ac=data.frame(g=group_list)
    rownames(ac)=colnames(n) #把ac的行名给到n的列名,即对每一个样本标记上分组信息
    (heatmap <- pheatmap(n,show_colnames =F,show_rownames = F,
                        annotation_col=ac))
    library(ggplot2)
    ggsave(filename = 'heatmap_top1000_sd.png',heatmap)
    
    heatmap_top1000_sd.png

    step3 - 差异分析

    3.1 使用limma包做差异分析

    参考:【生信菜鸟团】用limma对芯片数据做差异分析

    rm(list = ls())  ## 魔幻操作,一键清空~
    options(stringsAsFactors = F)
    load(file = 'step1-output.Rdata') ##载入数据
    dat[1:4,1:4] 
    #        GSM678364 GSM678365 GSM678366 GSM678367
    #A1CF     6.654092  6.074542  6.258994  6.609746
    #A2M     10.266259 11.034813 10.494956 11.450283
    #A2ML1    6.375017  6.118954  5.683851  4.278468
    #A3GALT2  4.424096  4.144055  4.906982  5.065266
    group_list[group_list=='non-triple'] <- 'non_triple'
    table(group_list) 
    #group_list
    #non_triple     triple 
    #        14          5 
    

    使用箱线图boxplot肉眼观察一下前几个基因的数据分布。

    boxplot(unlist(dat[1,])~group_list) 
    
    boxplot - gene-1.png
    自定义一个函数,用ggpubr包画漂亮点的boxplot
    library(ggpubr)
    bp=function(g){         #定义一个函数g,函数为{}里的内容
      df=data.frame(gene=g,stage=group_list)
      p <- ggboxplot(df, x = "stage", y = "gene",
                     color = "stage", palette = "jco",
                     add = "jitter")
      p + stat_compare_means()
    }
    p1 <- bp(as.numeric(dat[1,]))
    p2 <- bp(as.numeric(dat[2,]))
    p3 <- bp(as.numeric(dat[3,]))
    p4 <- bp(as.numeric(dat[4,]))
    
    library(patchwork)
    (bp_4 <- p1|p2|p3|p4)
    
    boxplot - 4gene.png

    以上也只属于check data,下面开始真正的差异分析

    library(limma)
    
    design <- model.matrix(~0+factor(group_list))
    colnames(design)=levels(factor(group_list))
    exprSet=dat
    rownames(design)=colnames(exprSet)
    contrast.matrix<-makeContrasts("triple-non_triple",levels = design)
    contrast.matrix ##这个矩阵声明,我们要把 triple 组跟 non_triple 进行差异分析比较
    #            Contrasts
    #Levels       triple-non_triple
    #  non_triple                -1
    #  triple                     1
    

    自定义函数DEG,功能是做差异分析。

    DEG <- function(exprSet,design,contrast.matrix){
      fit <- lmFit(exprSet,design)
      fit2 <- contrasts.fit(fit, contrast.matrix) 
      fit2 <- eBayes(fit2) 
      tempOutput = topTable(fit2, coef=1, n=Inf)
      nrDEG = na.omit(tempOutput) 
      return(nrDEG)
    }
    

    把数据喂给DEG函数,获取差异分析结果,并保存到Rdata

    deg <- DEG(exprSet,design,contrast.matrix)
    head(deg)
    #            logFC  AveExpr         t      P.Value  adj.P.Val        B
    #ARFGEF3 -1.933996 9.224776 -5.893853 8.360630e-06 0.06003476 2.930433
    #NEK11   -1.644951 6.944406 -5.882179 8.582134e-06 0.06003476 2.911194
    #HOXB3   -1.459656 7.038370 -5.696430 1.303732e-05 0.06003476 2.601988
    #EGR2     2.140287 6.380807  5.680934 1.350247e-05 0.06003476 2.575932
    #C3orf14 -2.110725 6.862692 -5.555574 1.794713e-05 0.06003476 2.363696
    # DNAH5   -3.742125 5.909818 -5.478036 2.141867e-05 0.06003476 2.231157
    save(deg,file = 'deg.Rdata')
    

    手动检查deg前几个基因表达情况

    bp(as.numeric(dat[rownames(dat)=='ARFGEF3',]))
    
    ARFGEF3_exprData.png

    可以看到,与non-triple组比较,triple组表达ARFGEF3显著下调,与deg结果一致。

    3.2 差异分析结果的可视化

    3.2.1 火山图

    设置P.ValuelogFC的阈值,将基因分为up,down,stable三类

    df <- deg
    colnames(deg)
    df$v <- -log10(df$P.Value)
    p_thred <- 0.05
    logFC_thred <- 1.5
    df$groups = ifelse(df$P.Value > p_thred, "stable", 
                  ifelse(df$logFC > logFC_thred, "up", 
                    ifelse(df$logFC < -logFC_thred, "down", 
                                     "stable")))
    table(df$groups)
    #  down stable     up 
    #   164  18554    119 
    library(ggplot2)
    this_tile <- paste0('Cutoff for logFC is ',round(logFC_thred,3),
                        '\nThe number of up gene is ',nrow(df[df$groups =='up',]) ,
                        '\nThe number of down gene is ',nrow(df[df$groups =='down',])
    )
    p <- ggplot(data = df, aes(x = logFC, y = v)) +
      geom_point(alpha=0.4, size=1.75, 
                 aes(color=groups)) +
      scale_color_manual(values=c("blue", "grey","red")) +
      geom_vline(xintercept=c(-logFC_thred,logFC_thred),lty=4,col="black",lwd=0.8) +
      geom_hline(yintercept = -log10(p_thred),lty=4,col="black",lwd=0.8) +
      labs(x="logFC",y="-log10(P.value)")+
      ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
      theme_bw()
    print(p)
    
    volcano.png
    3.2.2 热图
      load(file = 'step1-output.Rdata')
      dat[1:4,1:4]
    #        GSM678364 GSM678365 GSM678366 GSM678367
    #A1CF     6.654092  6.074542  6.258994  6.609746
    #A2M     10.266259 11.034813 10.494956 11.450283
    #A2ML1    6.375017  6.118954  5.683851  4.278468
    #A3GALT2  4.424096  4.144055  4.906982  5.065266
      table(group_list)
    #group_list
    #non-triple     triple 
    #        14          5
    x=deg$logFC #deg取logFC这列并将其重新赋值给x
    names(x)=rownames(deg) #deg取probe_id这列,并将其作为名字给x
    cg=c(names(head(sort(x),100)),#对x进行从小到大排列,取前100及后100,并取其对应的探针名,作为向量赋值给cg
           names(tail(sort(x),100)))
    
    n=t(scale(t(dat[cg,])))
    n[n>2]=2
    n[n< -2]= -2
    
    ac=data.frame(groups=group_list)
    rownames(ac)=colnames(n) 
    pheatmap(n,show_colnames =F,show_rownames = F,
               annotation_col=ac) 
    
    heatmap - deg - top200.png

    step4 - GO/KEGG数据库注释

    参考:【生信技能树】公众号文章:为R包写一本书(像Y叔致敬)

    4.0 整理数据

    设置P.Value和logFC的阈值,将基因分为UPDOWNstable3组, deg矩阵新建一列g储存基因分组信息。

    不同的阈值,筛选到的差异基因数量就不一样,后面的超几何分布检验结果就大相径庭。

    rm(list = ls())  ## 魔幻操作,一键清空~
    load(file = 'deg.Rdata')
    head(deg)
    #            logFC  AveExpr         t      P.Value  adj.P.Val        B
    #ARFGEF3 -1.933996 9.224776 -5.893853 8.360630e-06 0.06003476 2.930433
    #NEK11   -1.644951 6.944406 -5.882179 8.582134e-06 0.06003476 2.911194
    #HOXB3   -1.459656 7.038370 -5.696430 1.303732e-05 0.06003476 2.601988
    #EGR2     2.140287 6.380807  5.680934 1.350247e-05 0.06003476 2.575932
    #C3orf14 -2.110725 6.862692 -5.555574 1.794713e-05 0.06003476 2.363696
    #DNAH5   -3.742125 5.909818 -5.478036 2.141867e-05 0.06003476 2.231157
    p_thred <- 0.05
    logFC_thred <- 1.5
    deg$g=ifelse(deg$P.Value>p_thred,'stable',
                 ifelse( deg$logFC > logFC_thred,'UP',
                         ifelse( deg$logFC < -logFC_thred,'DOWN','stable') )
    )
    table(deg$g)
    #  DOWN stable     UP 
    #   164  18554    119 
    head(deg)
    #            logFC  AveExpr         t      P.Value  adj.P.Val        B      g
    #ARFGEF3 -1.933996 9.224776 -5.893853 8.360630e-06 0.06003476 2.930433   DOWN
    #NEK11   -1.644951 6.944406 -5.882179 8.582134e-06 0.06003476 2.911194   DOWN
    #HOXB3   -1.459656 7.038370 -5.696430 1.303732e-05 0.06003476 2.601988 stable
    #EGR2     2.140287 6.380807  5.680934 1.350247e-05 0.06003476 2.575932     UP
    #C3orf14 -2.110725 6.862692 -5.555574 1.794713e-05 0.06003476 2.363696   DOWN
    #DNAH5   -3.742125 5.909818 -5.478036 2.141867e-05 0.06003476 2.231157   DOWN
    

    富集分析需要基因的ENTREZID,使用Y叔神作clusterProfiler包里的bitr()函数获取对应关系,deg矩阵转存为DEG,并加上ENTREZID

    deg$SYMBOL=rownames(deg)
    library(ggplot2)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    df <- bitr(unique(deg$SYMBOL), fromType = "SYMBOL",
               toType = c( "ENTREZID"),
               OrgDb = org.Hs.eg.db)
    head(df)
    #   SYMBOL ENTREZID
    #1 ARFGEF3    57221
    #2   NEK11    79858
    #3   HOXB3     3213
    #4    EGR2     1959
    #5 C3orf14    57415
    #6   DNAH5     1767
    DEG=deg
    head(DEG)
    #            logFC  AveExpr         t      P.Value  adj.P.Val        B      g  SYMBOL
    #ARFGEF3 -1.933996 9.224776 -5.893853 8.360630e-06 0.06003476 2.930433   DOWN ARFGEF3
    #NEK11   -1.644951 6.944406 -5.882179 8.582134e-06 0.06003476 2.911194   DOWN   NEK11
    #HOXB3   -1.459656 7.038370 -5.696430 1.303732e-05 0.06003476 2.601988 stable   HOXB3
    #EGR2     2.140287 6.380807  5.680934 1.350247e-05 0.06003476 2.575932     UP    EGR2
    #C3orf14 -2.110725 6.862692 -5.555574 1.794713e-05 0.06003476 2.363696   DOWN C3orf14
    #DNAH5   -3.742125 5.909818 -5.478036 2.141867e-05 0.06003476 2.231157   DOWN   DNAH5
    
    DEG=merge(DEG,df,by='SYMBOL')
    head(DEG)
    #   SYMBOL       logFC   AveExpr           t    P.Value adj.P.Val         B      g ENTREZID
    #1    A1CF -0.24279450  6.481373 -0.78084875 0.44383564 0.8444129 -5.353833 stable    29974
    #2     A2M  0.74751848 11.069348  1.89303707 0.07258966 0.5827869 -4.132066 stable        2
    #3   A2ML1  0.99851595  4.933066  0.92880174 0.36382423 0.8085547 -5.242500 stable   144568
    #4 A3GALT2  0.03320466  4.269750  0.08468268 0.93333708 0.9894294 -5.625240 stable   127550
    #5  A4GALT -0.28893317  6.883433 -0.71439103 0.48305962 0.8637983 -5.397984 stable    53947
    #6   A4GNT  0.43189200  6.202285  1.60275087 0.12432137 0.6497896 -4.528644 stable    51146
    save(DEG,file = 'anno_DEG.Rdata')
    

    分别取出上调基因和下调基因,合并为差异基因

    gene_up <- DEG[DEG$g == 'UP','ENTREZID'] 
    gene_down <- DEG[DEG$g == 'DOWN','ENTREZID'] 
    gene_diff <- c(gene_up,gene_down)
    gene_all <- as.character(DEG[ ,'ENTREZID'] )
    
    geneList <- DEG$logFC
    names(geneList) <- DEG$ENTREZID
    geneList <- sort(geneList,decreasing = T)
    

    4.1 KEGG

    4.1.1 上调基因集
    library(ggplot2)
    library(clusterProfiler)
    kk.up <- enrichKEGG(gene         = gene_up,
                          organism     = 'hsa',
                          universe     = gene_all,
                          pvalueCutoff = 0.9,
                          qvalueCutoff = 0.9)
    head(kk.up)[,1:6]
    #               ID                       Description GeneRatio  BgRatio       pvalue    p.adjust
    #hsa04640 hsa04640        Hematopoietic cell lineage      7/53  88/7130 3.334829e-06 0.000506894
    #hsa04662 hsa04662 B cell receptor signaling pathway      6/53  78/7130 2.152723e-05 0.001636069
    #hsa04062 hsa04062       Chemokine signaling pathway      6/53 175/7130 1.769682e-03 0.081549397
    #hsa05340 hsa05340          Primary immunodeficiency      3/53  35/7130 2.146037e-03 0.081549397
    #hsa04921 hsa04921        Oxytocin signaling pathway      5/53 147/7130 4.506704e-03 0.133247928
    #hsa04064 hsa04064      NF-kappa B signaling pathway      4/53  95/7130 5.259787e-03 0.133247928
    

    KEGG分析上调基因集结果可视化:
    1)上调基因所属信号通路(气泡图)

    dotplot(kk.up)
    
    kk.up.dotplot.png

    2)查看第一个结果hsa04640的信号通路示意图:

    browseKEGG(kk.up, 'hsa04640')
    
    KEGG_hsa04640.png

    3)上调基因所属信号通路(条带图)

    (gg_barplot <- barplot(kk.up,showCategory=20))
    
    KEGG_up_barplot.png

    4)通路与基因之间的关系可视化:

    cnetplot(kk.up, categorySize="pvalue", foldChange=geneList,colorEdge = TRUE)  
    
    KEGG_up_cnetplot.png

    5)通路与通路之间的关系图:

    emapplot(kk.up)
    
    KEGG_up_emapplot.png

    6)热图展现通路与基因之间的关系:

    heatplot(kk.up)
    
    KEGG_up_heatplot.png
    4.1.2 下调基因集

    同样的方法计算下调基因集KEGG

    kk.down <- enrichKEGG(gene         =  gene_down,
                                            organism     = 'hsa',
                                            universe     = gene_all,
                                            pvalueCutoff = 0.9,
                                            qvalueCutoff = 0.9)
      head(kk.down)[,1:6]
    #                 ID                      Description GeneRatio  BgRatio       pvalue  p.adjust
    #hsa04915 hsa04915       Estrogen signaling pathway      6/62 130/7130 0.0008719172 0.1464821
    #hsa00910 hsa00910              Nitrogen metabolism      2/62  16/7130 0.0082547476 0.4221930
    #hsa04921 hsa04921       Oxytocin signaling pathway      5/62 147/7130 0.0087691281 0.4221930
    #hsa04934 hsa04934                 Cushing syndrome      5/62 152/7130 0.0100522154 0.4221930
    #hsa04360 hsa04360                    Axon guidance      5/62 177/7130 0.0184322243 0.4366278
    #hsa04927 hsa04927 Cortisol synthesis and secretion      3/62  65/7130 0.0186643448 0.4366278
    dotplot(kk.down );ggsave('kk.down.dotplot.png')
    
    kk.down.dotplot.png
    4.1.2 差异基因集
      kk.diff <- enrichKEGG(gene         = gene_diff,
                            organism     = 'hsa',
                            pvalueCutoff = 0.05)
      head(kk.diff)[,1:6]
      dotplot(kk.diff );ggsave('kk.diff.dotplot.png')
    
    kk.diff.dotplot.png

    合并上下调基因后,几乎分析不出信号通路。╮(╯▽╰)╭

    4.1.3 Pathway Enrichment
    kegg_diff_dt <- as.data.frame(kk.diff)
    kegg_down_dt <- as.data.frame(kk.down)
    kegg_up_dt <- as.data.frame(kk.up)
    down_kegg<-kegg_down_dt[kegg_down_dt$pvalue<0.05,];down_kegg$group=-1
    up_kegg<-kegg_up_dt[kegg_up_dt$pvalue<0.05,];up_kegg$group=1
    
    kegg_plot <- function(up_kegg,down_kegg){
        dat=rbind(up_kegg,down_kegg)
        colnames(dat)
        dat$pvalue = -log10(dat$pvalue)
        dat$pvalue=dat$pvalue*dat$group 
        
        dat=dat[order(dat$pvalue,decreasing = F),]
        
        g_kegg<- ggplot(dat, aes(x=reorder(Description,order(pvalue, decreasing = F)), y=pvalue, fill=group)) + 
          geom_bar(stat="identity") + 
          scale_fill_gradient(low="blue",high="red",guide = FALSE) + 
          scale_x_discrete(name ="Pathway names") +
          scale_y_continuous(name ="log10P-value") +
          coord_flip() + theme_bw()+theme(plot.title = element_text(hjust = 0.5))+
          ggtitle("Pathway Enrichment") 
    }
     
    g_kegg=kegg_plot(up_kegg,down_kegg)
    print(g_kegg)
    ggsave(g_kegg,filename = 'kegg_up_down.png')
    
    kegg_up_down.png
    这张图其实就是上调基因和下调基因pathway条带图合并,可以看到Oxytocin signaling pathway同时存在于上调基因和下调基因pathway,刚好跟差异基因集的分析结果一样。
    4.1.4 GSEA
    kk_gse <- gseKEGG(geneList     = geneList,
                        organism     = 'hsa',
                        nPerm        = 1000,
                        minGSSize    = 120,
                        pvalueCutoff = 0.9,
                        verbose      = FALSE)
    head(kk_gse)[,1:6]
    gseaplot(kk_gse, geneSetID = rownames(kk_gse[1,]))
    
    gseaplot.png
    down_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore < 0,];down_kegg$group=-1
    up_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore > 0,];up_kegg$group=1
      
    g_kegg=kegg_plot(up_kegg,down_kegg)
    print(g_kegg)
    ggsave(g_kegg,filename = 'kegg_up_down_gsea.png')
    
    kegg_up_down_gsea.png

    4.2 GO

    g_list=list(gene_up=gene_up,
                gene_down=gene_down,
                gene_diff=gene_diff)
      
    go_enrich_results <- lapply( g_list , function(gene) {
          lapply( c('BP','MF','CC') , function(ont) {
          cat(paste('Now process ',ont ))
          ego <- enrichGO(gene          = gene,
                          universe      = gene_all,
                          OrgDb         = org.Hs.eg.db,
                          ont           = ont ,
                          pAdjustMethod = "BH",
                          pvalueCutoff  = 0.99,
                          qvalueCutoff  = 0.99,
                          readable      = TRUE)
            
          print( head(ego) )
          return(ego)
        })
    })
    save(go_enrich_results,file = 'go_enrich_results.Rdata')
    
    load(file = 'go_enrich_results.Rdata')
    n1= c('gene_up','gene_down','gene_diff')
    n2= c('BP','MF','CC') 
    for (i in 1:3){
      for (j in 1:3){
        fn=paste0('dotplot_',n1[i],'_',n2[j],'.png')
        cat(paste0(fn,'\n'))
        png(fn,res=150,width = 1080)
        dotplot(go_enrich_results[[i]][[j]],title=paste0('dotplot_',n1[i],'_',n2[j])) %>% print()
        dev.off()
      }
    }
    
    dotplot_gene_up_BP.png
    dotplot_gene_up_MF.png
    dotplot_gene_up_CC.png dotplot_gene_down_BP.png
    dotplot_gene_down_MF.png
    dotplot_gene_down_CC.png dotplot_gene_diff_BP.png
    dotplot_gene_diff_MF.png
    dotplot_gene_diff_CC.png

    未完待续……

    step5 - GSEA基因集富集分析

    step6 - GSVA基因集变异分析

    相关文章

      网友评论

        本文标题:【生信技能树】2020-01-08作业:GEO挖掘标准流程分析G

        本文链接:https://www.haomeiwen.com/subject/nxnuactx.html