#For Normalize test
#Author:Robin 20200619
library(GenomicFeatures)
#gtf len calculate
txdb=makeTxDbFromGFF("/share/nas1/Data/DataBase/Genome/Homo_sapiens/Transcriptome/GRCh38_hg38/GRCh38.p13/genes/genes.gtf") #read gtf file
exon_txdb=exons(txdb)
genes_txdb=genes(txdb)
o = findOverlaps(exon_txdb,genes_txdb)
t1=exon_txdb[queryHits(o)]
t2=genes_txdb[subjectHits(o)]
t1=as.data.frame(t1)
t1$geneid=mcols(t2)[,1]
g_l = lapply(split(t1,t1$geneid),function(x){
tmp=apply(x,1,function(y){
y[2]:y[3]
})
length(unique(unlist(tmp)))
})
g_l=data.frame(gene_id=names(g_l),length=as.numeric(g_l))
write.csv(g_l,"/share/nas1/Data/Users/luohb/Pipline/Normalize/data/bf_gene_length.csv",row.names = F,quote=F)
########################## 01.FPKM #############################
library(scater)
?calculateFPKM
mtx2<-mtx
mtx2$Length<-NULL
fpkm_check<-calculateFPKM(mtx2, mtx$Length)
"""
FPKM = total exon Fragments / (mapped reads(Millions) * exon length(KB))
"""
len=mtx$Length
FPKM = t(t(mtx2*10^9) / colSums(mtx2)) /len
View(FPKM)
View(fpkm_check)
######################## 02.TPM ##################
"""
TPMi=(Ni/Li)*1000000/sum(Ni/Li+……..+ Nm/Lm)
= Ni/li *10^6/sum(Ni/li)
"""
len=mtx$Length/10^3 #转录本长度
#先计算分子部分
rpk = mtx2/len
#在计算整体
TPM = t( t(rpk)*10^6 / colSums(rpk) )
#check
tpm_check <- calculateTPM(mtx2, len)
View(TPM)
View(tpm_check)
View(rpk)
######################### 03.CPM ################
"""
CPM = Total exon read counts/ Mapped reads(Millions)
= gene A counts*10^6 / mapped reads
"""
CPM = t( t(mtx2) * 10^6 / colSums(mtx2))
cpm_check = calculateCPM(mtx2)
View(cpm_check)
View(CPM)
#### result save
saveRDS(FPKM, '/share/nas1/Data/Users/luohb/Pipline/Normalize/result/FPKM.rds')
saveRDS(TPM, '/share/nas1/Data/Users/luohb/Pipline/Normalize/result/TPM.rds')
saveRDS(CPM, '/share/nas1/Data/Users/luohb/Pipline/Normalize/result/CPM.rds')
网友评论