count转TPM

作者: 一只小脑斧 | 来源:发表于2022-06-07 19:43 被阅读0次

    https://cloud.tencent.com/developer/article/1906294

    ####################定义基因长度为 非冗余exon长度之和

    library("TxDb.Hsapiens.UCSC.hg38.knownGene")

    txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

    if(!file.exists('gene_length.Rdata')){

      exon_txdb=exons(txdb)

      genes_txdb=genes(txdb)

      o = findOverlaps(exon_txdb,genes_txdb)

      o

      t1=exon_txdb[queryHits(o)]

      t2=genes_txdb[subjectHits(o)]

      t1=as.data.frame(t1)

      t1$geneid=mcols(t2)[,1]

      g_l = lapply(split(t1,t1$geneid),function(x){

        # x=split(t1,t1$geneid)[[1]]

        head(x)

        tmp=apply(x,1,function(y){

          y[2]:y[3]

        })

        length(unique(unlist(tmp)))

      })

      head(g_l)

      g_l=data.frame(gene_id=names(g_l),length=as.numeric(g_l))

      save(g_l,file = 'gene_length.Rdata')

    }

    #检查表达量矩阵额基因长度信息

    load(file = 'gene_length.Rdata')

    head(g_l)

    library(org.Hs.eg.db)

    g2s=toTable(org.Hs.egSYMBOL)

    genelength <-  merge(g_l,g2s,by='gene_id')

    head(genelength)

    genelength=genelength[,c(3,2)]

    colnames(genelength) <- c("gene","length")

    #排序

    counts <- exp[rownames(exp) %in% genelength$gene,]

    #转换成矩阵

    count<- as.matrix(counts)

    #存贮基因长度信息

    genelength[match(rownames(counts),

                    genelength$gene),"length"]

    # eff_length<- genelength$length

    eff_length <- genelength[match(rownames(counts),genelength$gene),"length"]/1000

    class(count[1,1])

    #count转为数值

    data=apply(count,2,as.numeric)

    row.names(data)<-row.names(count)

    count<-data

    x <- count/eff_length

    mat_tpm <- t( t(x) / colSums(x) ) * 1e6

    rownames(mat_tpm)<- rownames(count)

    mat_tpm[1:4,1:4]

    #mat_tpm<-mat_tpm[-1,]

    write.table(mat_tpm, file="paca.tidy.tpm.txt", sep="\t", col.names=T, quote=F,row.names = T)

    相关文章

      网友评论

        本文标题:count转TPM

        本文链接:https://www.haomeiwen.com/subject/tanemrtx.html