count转TPM

作者: 一只小脑斧 | 来源:发表于2022-06-07 19:43 被阅读0次

https://cloud.tencent.com/developer/article/1906294

####################定义基因长度为 非冗余exon长度之和

library("TxDb.Hsapiens.UCSC.hg38.knownGene")

txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

if(!file.exists('gene_length.Rdata')){

  exon_txdb=exons(txdb)

  genes_txdb=genes(txdb)

  o = findOverlaps(exon_txdb,genes_txdb)

  o

  t1=exon_txdb[queryHits(o)]

  t2=genes_txdb[subjectHits(o)]

  t1=as.data.frame(t1)

  t1$geneid=mcols(t2)[,1]

  g_l = lapply(split(t1,t1$geneid),function(x){

    # x=split(t1,t1$geneid)[[1]]

    head(x)

    tmp=apply(x,1,function(y){

      y[2]:y[3]

    })

    length(unique(unlist(tmp)))

  })

  head(g_l)

  g_l=data.frame(gene_id=names(g_l),length=as.numeric(g_l))

  save(g_l,file = 'gene_length.Rdata')

}

#检查表达量矩阵额基因长度信息

load(file = 'gene_length.Rdata')

head(g_l)

library(org.Hs.eg.db)

g2s=toTable(org.Hs.egSYMBOL)

genelength <-  merge(g_l,g2s,by='gene_id')

head(genelength)

genelength=genelength[,c(3,2)]

colnames(genelength) <- c("gene","length")

#排序

counts <- exp[rownames(exp) %in% genelength$gene,]

#转换成矩阵

count<- as.matrix(counts)

#存贮基因长度信息

genelength[match(rownames(counts),

                genelength$gene),"length"]

# eff_length<- genelength$length

eff_length <- genelength[match(rownames(counts),genelength$gene),"length"]/1000

class(count[1,1])

#count转为数值

data=apply(count,2,as.numeric)

row.names(data)<-row.names(count)

count<-data

x <- count/eff_length

mat_tpm <- t( t(x) / colSums(x) ) * 1e6

rownames(mat_tpm)<- rownames(count)

mat_tpm[1:4,1:4]

#mat_tpm<-mat_tpm[-1,]

write.table(mat_tpm, file="paca.tidy.tpm.txt", sep="\t", col.names=T, quote=F,row.names = T)

相关文章

网友评论

    本文标题:count转TPM

    本文链接:https://www.haomeiwen.com/subject/tanemrtx.html