代码如下:
#!/usr/bin/env Rscript
#parse parameter
library(argparser,quietly = TRUE)
#Creat a parser
p<- arg_parser("run featureCounts and calculate FPKM/TPM")
#Add command line argumnets
p<-add_argument(p,"--bam",help="input: bam file",type="character")
p<-add_argument(p,"--gtf",help="input: gtf file",type="character")
p<-add_argument(p,"--output",help="out prefix",type="character")
#Parse the command line arguments
argv<-parse_args(p)
library(Rsubread)
library(limma)
library(edgeR)
bamFile<- argv$bam
gtfFile<- argv$gtf
nthreads<- 1
outFilePref<- argv$output
outStatsFilePath<- paste(outFilePref, '.log',sep='');
outCountsFilePath<- paste(outFilePref,'.count',sep='');
fCountsList=featureCounts(bamFile,annot.ext=gtfFile,isGTFAnnotationFile=TRUE,nthreads=nthreads,isPairedEnd=TRUE)
dgeList=DGEList(counts=fCountsList$counts,genes=fCountsList$annotation)
fpkm=rpkm(dgeList,dgeList$genes$Length)
tpm=exp(log(fpkm)-log(sum(fpkm))+log(1e6))
write.table(fCountsList$stat,outStatsFilePath,sep="\t",col.names = FALSE,row.names = FALSE,quote=FALSE)
featureCounts=cbind(fCountsList$annotation[,1],fCountsList$counts,fpkm,tpm)
colnames(featureCounts)=c('gene_id','counts','fpkm','tpm')
write.table(featureCounts,outCountsFilePath,sep="\t",col.names = TRUE,row.names = FALSE,quote = FALSE)
使用方法如下:usage: run-featurecounts.R [--] [--help] [--bam BAM]
[--gtf GTF] [--output OUTPUT]
有时候需要运行:Rscript run-featurecounts.R --bam BAM --gtf GTF --output OUTPUT
结果展示:
gene_id counts fpkm tpm
Os01g0100100 372 5.48313205414791 6.2561821577044
Os01g0100200 0 0 0
Os01g0100300 0 0 0
Os01g0100400 156 3.12294044761257 3.56323431844892
Os01g0100466 0 0 0
Os01g0100500 1365 26.5755626412215 30.322370350572
Os01g0100600 117 2.58240088289187 2.94648572531943
Os01g0100650 0 0 0
Os01g0100700 3281 156.664971431348 178.752689033746
版权属于“基因课”,更多详见基因课http://www.genek.tv/
网友评论