美文网首页生信
将salmon生成的数据导入R语言做DESeq2差异分析

将salmon生成的数据导入R语言做DESeq2差异分析

作者: FANHONGZENG | 来源:发表于2021-10-24 19:22 被阅读0次

用salmon走完RNA-Seq上游分析后,就要把数据导入R里做下游分析

使用R包tximport导入数据

library(tximport)
library(GenomicFeatures)
txd<-makeTxDbFromGFF(file='../RNAseq/Arabidopsis/project/AtRTDv2_QUASI_19April2016.gtf',format='gtf',dataSource='AtRTDv2_QUASI_19April2016.gtf',organism ='Arabidopsis thaliana')  ##导入gtf文件
str(txd)
keytypes(txd)
txTogene<-AnnotationDbi::select(txd,keys =keys(txd,'TXNAME'),keytype='TXNAME',columns=c('TXNAME','GENEID'))
head(txTogene)
data<-c('SRR3581878_output/quant.sf','SRR3581712_output/quant.sf','SRR3581836_output/quant.sf','SRR3581356_output/quant.sf','SRR3581676_output/quant.sf','SRR3581843_output/quant.sf','SRR3581865_output/quant.sf','SRR3581699_output/quant.sf','SRR3581703_output/quant.sf','SRR3581869_output/quant.sf','SRR3581705_output/quant.sf','SRR3581871_output/quant.sf')
samplelist<-c('seed-1','seed-2','root-1','root-2','leaf-1','leaf-2','flower-1','flower-2','pedicel-1','pedicel-2','internode-1','internode-2')
names(data)<-samplelist
txi<-tximport(data,type='salmon',tx2gene=txTogene)
write.csv(txi,'txi.csv',row.names=TRUE)

这样就把salmon的文件成功导入了R里

接下来使用R包DESeq2做差异分析

DESeq2需要3个矩阵:表达矩阵(countData),分组矩阵(colData),差异比较矩阵(design)

library(DESeq2)
sampleNames<-c('seed-1','seed-2','root-1','root-2','leaf-1','leaf-2','flower-1','flower-2','pedicel-1','pedicel-2','internode-1','internode-2')
sampleGroup<-c('seed','seed','root','root','leaf','leaf','flower','flower','pedicel','pedicle','internode','internode')     ##差异比较矩阵
sampleTable<-data.frame(sampleName=sampleNames,type=sampleGroup)     ##分组矩阵
rownames(sampleTable)<-colnames(txi$counts)
dds<-DESeqDataSetFromTximport(txi,sampleTable,design=~type)       ##差异分析
dds<-DESeq(dds) 
res<-results(dds)        ##从DESeq 分析中提取结果表
resordered<-res[order(res$padj),]        ##以padj对res排序
write.csv(resordered,'res.csv',row.names=TRUE)

这里的res.csv里面包含log2FoldChange和pvalue值,可以用来作图寻找差异基因

pheatmap 画热图

library(pheatmap)
exprSet<-read.csv(file = 'txi.csv',header = T,row.names = 1)  ##使用第一步得到的txi.csv表达矩阵
exprSet<-exprSet[,(13:24)]
head(resOrdered)
DEG<-as.data.frame(resOrdered)
nrDEG=na.omit(DEG)
choose_gene=head(rownames(nrDEG),50) ##选择50个基因画热图
choose_matrix=exprSet[choose_gene,]
choose_matrix=t(scale(t(choose_matrix)))
pheatmap(choose_matrix,filename = 'DEseq_heatmap.png')
heatmap热图

ggplot 画火山图

火山图可以看到上调和下调的差异基因

nrDEG$change = as.factor(ifelse(nrDEG$pvalue< 0.05 & abs(nrDEG$log2FoldChange) > 2,
                                  ifelse(nrDEG$log2FoldChange>2 ,'UP','DOWN'),'NOT'))
this_tile <- paste0('Cutoff for logFC is ',round(2,3),
                      '\nThe number of up gene is ',nrow(nrDEG[nrDEG$change =='UP',]) ,
                      '\nThe number of down gene is ',nrow(nrDEG[nrDEG$change =='DOWN',])
  )
library(ggplot2)
g = ggplot(data=nrDEG, 
             aes(x=log2FoldChange, y=-log10(pvalue), 
                 color=change)) +
    geom_point(alpha=0.4, size=1.75) +
    theme_set(theme_set(theme_bw(base_size=20)))+
    xlab("log2 fold change") + ylab("-log10 p-value") +
    ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
    scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)
print(g)
ggsave(g,filename = 'volcano.png')
火山图

相关文章

网友评论

    本文标题:将salmon生成的数据导入R语言做DESeq2差异分析

    本文链接:https://www.haomeiwen.com/subject/kbcdaltx.html