美文网首页
2021-07-09:细数我在GO注释时遇到的那些报错

2021-07-09:细数我在GO注释时遇到的那些报错

作者: wangyantao1991 | 来源:发表于2021-07-10 09:39 被阅读0次

    上回说到,我废了九牛二虎之力,终于拿到了百十来个GB的GO EVIDENCE 数据,你猜怎么着,接着往下进行的时候发现,R语言根本读不了。。。。心疼自己一秒钟

    来吧,今天接着搞起:

    library(GOstats)

    ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sme',row.names = NULL)

    #报错

    > colnames(ago) <- c('gene_id','go_id','evi')

    Error in names(x) <- value :

      'names' attribute [3] must be the same length as the vector [1]

    之前我用write.csv生成的EVIDENCE注释文件,在读取时,列与列之间多了',',因此在读入时加了sep=',',而这次列与列之间是空格,同理,要加入sep=' '。

    ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sme',row.names = NULL,sep=' ')

    成功读入,进入下一步

    goframeData <- na.omit(data.frame(ago$go_id,ago$evi,ago$gene_id))

    goFrame <- GOFrame(goframeData)

    goAllFrame <- GOAllFrame(goFrame)

    library('GSEABase')

    gsc <- GeneSetCollection(goAllFrame,setType = GOCollection())

    universe <- as.vector(ago$gene_id)

    DEGfile <- read.csv('xxxxxx.csv',header = T,row.names = NULL)

    #读入差异表达基因#

    genes = as.vector(DEGfile$gene_id)

    params <- GSEAGOHyperGParams(name = 'custom',

                                geneSetCollection = gsc,

                                geneIds = genes,

                                universeGeneIds = universe,

                                ontology = 'CC',

                                pvalueCutoff = 0.05,

                                conditional = FALSE,

                                testDirection = 'over')

    Over <- hyperGTest(params)

    write.csv(summary(Over),xxxxxx_GO_CC_hyper.csv')

    问题很多:

    1、EVIDENCE注释文件的gene_id与DESeq2差异表达的文件中的gene_id要保持一致

    sed "s/.1 / /g" swiss_goev_sme > swiss_goev_sme.1

    终于成功生成GO注释文件,以下为全部脚本

    > library(GOstats)

    > ago <- read.delim('/vol3/agis/zhoushaoqun_group/wangyantao/GO/swiss_goev_sly.1',row.names = NULL,sep=' ')

    > colnames(ago) <- c('gene_id','go_id','evi')

    > goframeData <- na.omit(data.frame(ago$go_id,ago$evi,ago$gene_id))

    > goFrame <- GOFrame(goframeData)

    > goAllFrame <- GOAllFrame(goFrame)

    There were 21 warnings (use warnings() to see them)

    > library('GSEABase')

    Loading required package: annotate

    Loading required package: XML

    Attaching package: ‘XML’

    The following object is masked from ‘package:graph’:

        addNode

    Warning messages:

    1: package ‘GSEABase’ was built under R version 4.0.3

    2: package ‘annotate’ was built under R version 4.0.3

    > gsc <- GeneSetCollection(goAllFrame,setType = GOCollection())

    > universe <- as.vector(ago$gene_id)

    > DEGfile <- read.csv('/vol3/agis/zhoushaoqun_group/wangyantao/GO/diffgene_Sly48.48.0_deseq2.csv',header = T,row.names = NULL)

    > genes = as.vector(DEGfile[,1])

    > params <- GSEAGOHyperGParams(name = 'custom',

    +                              geneSetCollection = gsc,

    +                              geneIds = genes,

    +                              universeGeneIds = universe,

    +                              ontology = 'CC',

    +                              pvalueCutoff = 0.05,

    +                              conditional = FALSE,

    +                              testDirection = 'over')

                                  geneSetCollection = gsc,

                                  geneIds = genes,

                                  universeGeneIds = universe,

                                  ontology = 'MF',

                                  pvalueCutoff = 0.05,

                                  conditional = FALSE,

                                  testDirection = 'over')

    Warning messages:

    1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds

    2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds

    > params1 <- GSEAGOHyperGParams(name = 'custom',

    +                              geneSetCollection = gsc,

    +                              geneIds = genes,

    +                              universeGeneIds = universe,

    +                              ontology = 'BP',

    +                              pvalueCutoff = 0.05,

    +                              conditional = FALSE,

    +                              testDirection = 'over')

    Warning messages:

    1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds

    2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds

    > params2 <- GSEAGOHyperGParams(name = 'custom',

    +                              geneSetCollection = gsc,

    +                              geneIds = genes,

    +                              universeGeneIds = universe,

    +                              ontology = 'MF',

    +                              pvalueCutoff = 0.05,

    +                              conditional = FALSE,

    +                              testDirection = 'over')

    Warning messages:

    1: In makeValidParams(.Object) : removing duplicate IDs in universeGeneIds

    2: In makeValidParams(.Object) : removing geneIds not in universeGeneIds

    >

    > Over <- hyperGTest(params)

    > Over1 <- hyperGTest(params1)

    > Over2 <- hyperGTest(params2)

    > write.csv(summary(Over),'Sly48_GO_CC_hyper.csv')

    > write.csv(summary(Over1),'Sly48_GO_BP_hyper.csv')

    > write.csv(summary(Over2),'Sly48_GO_MF_hyper.csv')

    > library(ggplot2)

    > library(RColorBrewer)

    Warning message:

    package ‘RColorBrewer’ was built under R version 4.0.3

    > display.brewer.all()

    > col3 <- brewer.pal(4,'Set3')[c(1,3,4)]

    > cc = read.csv('Sly48_GO_CC_hyper.csv',header = T,row.names = NULL)

    > bp = read.csv('Sly48_GO_BP_hyper.csv',header = T,row.names = NULL)

    > mf = read.csv('Sly48_GO_MF_hyper.csv',header = T,row.names = NULL)

    > cc2 <- cc[cc$Pvalue<1e-5,]

    > bp2 <- bp[bp$Pvalue<1e-5,]

    > mf2 <- mf[mf$Pvalue<1e-5,]

    > colnames(cc2)[2] <- c('GOID')

    > colnames(bp2)[2] <- c('GOID')

    > colnames(mf2)[2] <- c('GOID')

    > data <- rbind(mf2,cc2)

    > data <- rbind(data,bp2)

    > data2 <- data.frame(data,type = c(rep('MF',dim(mf2)[1]),rep('CC',dim(cc2)[1]),rep('BP',dim(bp2)[1])))

    > p <- ggplot(data2,aes(x = Term,y = log2(Count),fill = type,order = type))+

    +  geom_bar(stat = 'identity')+coord_flip()+

    +  scale_fill_manual(values = col3,limits = c('BP','CC','MF'),labels = c('BP','CC','MF'))+

    +  theme_bw()+theme(legend.title=element_blank(),

    +                    axis.text = element_text(size = 10),

    +                    axis.title.x = element_text(size = rel(1.5)),

    +                    axis.title.y = element_text(size = rel(1.5),angle = 90),

    +                    legend.text = element_text(size = rel(1.5)))+

    +  ylab('Number of Genes (log2)') + xlab('Terms')

    > ggsave('Sly48.go_hyper.tiff',p,device = 'tiff',scale = 1,width = 20,height = 30,dpi = 300)

    >

    > display.brewer.all()

    > col3 <- brewer.pal(4,'Set3')[c(1,3,4)]

    > cc = read.csv('Sly48_GO_CC_hyper.csv',header = T,row.names = NULL)

    > bp = read.csv('Sly48_GO_BP_hyper.csv',header = T,row.names = NULL)

    > mf = read.csv('Sly48_GO_MF_hyper.csv',header = T,row.names = NULL)

    > cc2 <- cc[cc$Pvalue<1e-2,]

    > bp2 <- bp[bp$Pvalue<1e-2,]

    > mf2 <- mf[mf$Pvalue<1e-2,]

    > colnames(cc2)[2] <- c('GOID')

    > colnames(bp2)[2] <- c('GOID')

    > colnames(mf2)[2] <- c('GOID')

    > data <- rbind(mf2,cc2)

    > data <- rbind(data,bp2)

    > data2 <- data.frame(data,type = c(rep('MF',dim(mf2)[1]),rep('CC',dim(cc2)[1]),rep('BP',dim(bp2)[1])))

    > p <- ggplot(data2,aes(x = Term,y = log2(Count),fill = type,order = type))+

    +  geom_bar(stat = 'identity')+coord_flip()+

    +  scale_fill_manual(values = col3,limits = c('BP','CC','MF'),labels = c('BP','CC','MF'))+

    +  theme_bw()+theme(legend.title=element_blank(),

    +                    axis.text = element_text(size = 10),

    +                    axis.title.x = element_text(size = rel(1.5)),

    +                    axis.title.y = element_text(size = rel(1.5),angle = 90),

    +                    legend.text = element_text(size = rel(1.5)))+

    +  ylab('Number of Genes (log2)') + xlab('Terms')

    > ggsave('Sly48.go_hyper.tiff',p,device = 'tiff',scale = 1,width = 20,height = 30,dpi = 300)

    一开始的时候,pvalue设置为10的-5次方,结果并没有生产图片,后面我把pvalue改成10的-2次方,好歹有个结果。

    经过测试,以上脚本可用,开始批量处理相关物种差异表达的GO注释

    相关文章

      网友评论

          本文标题:2021-07-09:细数我在GO注释时遇到的那些报错

          本文链接:https://www.haomeiwen.com/subject/etzxpltx.html