美文网首页R优质资源走进转录组生信技巧
由 reads count 数计算 TPM 和FPKM

由 reads count 数计算 TPM 和FPKM

作者: 欧阳松 | 来源:发表于2021-02-03 22:15 被阅读0次

    reads count数计算TPMFPKM
    下载gff文件
    http://venanciogroup.uenf.br/cgi-bin/gmax_atlas/download_by_pub.cgi去这里下载
    reads count 数据

    library("GenomicFeatures")
    library("dplyr")
    path1 = setwd("C:/Users/95656/Desktop/hyp/TZX/")
    setwd(path1)
    # 计算基因length
    
    gffpath = "Gmax_275_Wm82.a2.v1.gene.gff3"
    print(gffpath)
    txdb <- makeTxDbFromGFF(gffpath,format="gff")
    exons_gene <- exonsBy(txdb, by = "gene")
    exons_gene_lens <- lapply(exons_gene,function(x){sum(width(reduce(x)))})
    length1 = t(as.data.frame(exons_gene_lens))
    write.table(length1,"C:/Users/95656/Desktop/hyp/TZX/length.txt", sep = "\t")
    
    # 合并 reads count 数据和length
    length1 = read.table("C:/Users/95656/Desktop/hyp/TZX/length.txt", header = T,sep="\t")
    countsfile = "C:/Users/95656/Desktop/hyp/TZX/PRJNA238493_BP_raw_count.expression.tsv"
    count1 = read.table(countsfile, header = T, sep = "\t")
    a = inner_join(count1, length1, by="Locus_Name")
    a[is.na(a)] = 0 # 设置 NA值为0
    a = a[, -28]  # 删除第28列  Locus_Name.1
    write.table(a,"C:/Users/95656/Desktop/hyp/TZX/countsdata.txt", sep = "\t", row.names = FALSE)
    
    # 按照对应关系给countdata columns 改名,改好的文件保存为 re_countsdata.txt
    # {'SAMN02644800': 'root', 'SAMN02644811': 'leaf3', 'SAMN02644826': 'seed4', 'SAMN02644825': 'seed3', 'SAMN02644822': 'podseed3', 'SAMN02644821': 'podseed2', 'SAMN02644820': 'podseed1', 'SAMN02644803': 'stem1', 'SAMN02644806': 'leafbud1', 'SAMN02644804': 'stem2', 'SAMN02644813': 'flower2', 'SAMN02644814': 'flower3', 'SAMN02644816': 'flower5', 'SAMN02644815': 'flower4', 'SAMN02644817': 'pod1', 'SAMN02644823': 'seed1', 'SAMN02644818': 'pod2', 'SAMN02644819': 'pod3', 'SAMN02644824': 'seed2', 'SAMN02644809': 'leaf1', 'SAMN02644807': 'leafbud2', 'SAMN02644802': 'cotyledon2', 'SAMN02644810': 'leaf2', 'SAMN02644805': 'shoot meristem', 'SAMN02644812': 'flower1', 'SAMN02644827': 'seed5'}
    
    # 计算 TPM 和 FPKM
    
    setwd("C:/Users/95656/Desktop/hyp/TZX/")
    countfile = "C:/Users/95656/Desktop/hyp/TZX/re_countsdata.txt"
    mycounts<-read.table(countfile, header = T,sep="\t")
    head(mycounts)
    rownames(mycounts)<-mycounts[,1]
    mycounts<-mycounts[,-1]
    head(mycounts)
    # TPM
    kb <- mycounts$length / 1000
    countdata <- mycounts[,1:26]
    rpk <- countdata / kb
    tpm <- t(t(rpk)/colSums(rpk) * 1000000)
    head(tpm)
    write.table(tpm,file="my_TPM.txt",sep="\t",quote=F)
    # FPKM
    fpkm <- t(t(rpk)/colSums(countdata) * 10^6)
    head(fpkm)
    write.table(fpkm,file="my_FPKM.txt",sep="\t",quote=F)
    
    
    ## FPKM转化为TPM
    # fpkm_to_tpm = t(t(fpkm)/colSums(fpkm))*10^6
    # head(fpkm_to_tpm)
    

    计算 共表达相关性

    path1 = "C:\\Users\\95656\\Desktop\\hyp\\比较"
    setwd(path1)
    rowdata <- read.table(choose.files(),header=T,sep="\t")
    # exp1 = rowdata[,5:]
    # "cotyledon1", "leafbud3",
    exp1 = rowdata[c("cotyledon2", "flower1", "flower2", "flower3", "flower4", "flower5", "leaf1", "leaf2", "leaf3", "leafbud1", "leafbud2", "pod1", "pod2", "pod3", "podseed1", "podseed2", "podseed3", "root", "seed1", "seed2", "seed3", "seed4", "seed5", "shootmeristem", "stem1", "stem2")]
    rownames(exp1)<-rowdata$Name
    exp1[is.na(exp1)] = 0 # 设置 NA值为0
    exp1 = as.matrix(exp1)
    geneid <- rowdata[,'Name']
    TCP = geneid[1:5] 
    Gma = geneid[1:5]
    outfile = choose.files()
    for(i in 1:length(TCP))
    {
        for(j in 1:length(Gma))
        {
            x <- as.numeric(exp1[i,])
            y <- as.numeric(exp1[j,])
            if(sum(exp1[i,])*sum(exp1[j,]) != 0)#取出表达值都为0的行,如果要严格一点,想要把表达值出现0的行都去掉的话可以把求和函数sum()换成求积函数prod(x)
            {
                gene_cor <- cor.test(x,y,alternative = "two.sided",method = "pearson")
                gene_p <- gene_cor$p.value
                gene_e <- gene_cor$estimate
                if(abs(gene_e) >= 0 && gene_p <= 1)
                {
                    res<-paste(TCP[i],Gma[j],gene_e,gene_p,sep="\t")
                    write.table(res, outfile,sep="\t",col.names=F,row.names=F,quote=F,append=T)
                }
            }else{
                print("something is error")
            }
        }
    }
    

    相关文章

      网友评论

        本文标题:由 reads count 数计算 TPM 和FPKM

        本文链接:https://www.haomeiwen.com/subject/ojxvtltx.html