DEseq2 计算FPKM和FC值

作者: 一直想要成为大牛的科研狗 | 来源:发表于2022-10-25 11:05 被阅读0次

    R

    #install.packages("BiocManager")
    #BiocManager::install('DESeq2') 
    #BiocManager::install("GenomicFeatures") 
    
    library("DESeq2")
    ###data in
    countDataraw <- as.matrix(read.csv("gene_count_matrix111.csv", row.names="Geneid"))
    colDataraw <- read.csv("phenodata111.csv")
    rownames(colDataraw) <- colDataraw$sample
    cn <- colDataraw$group
    
    ###rownames =?colnames
    all(rownames(colData) %in% colnames(countDataraw))
    countDataraw <- countDataraw[ , rownames(colData)]
    all(rownames(colData) == colnames(countDataraw))
    
    # dds matrix
    ddsHTSeq <- DESeqDataSetFromMatrix(countData = countDataraw, 
                                       colData = colDataraw, 
                                       design = ~ group)
    ddsHTSeq = estimateSizeFactors(ddsHTSeq) 
    
    ###count
    ExpNor <- counts(ddsHTSeq,normalized = TRUE)
    ###fpkm
    library(GenomicFeatures)
    txdb <- makeTxDbFromGFF("merged.gtf", format = "gtf", circ_seqs = character())
    ebg <- exonsBy(txdb, by="gene")
    exon_gene_sizes <- sum(width(reduce(ebg)))
    mcols(ddsHTSeq)$basepairs <- exon_gene_sizes
    fpkm_out = fpkm(ddsHTSeq)
    write.table(as.data.frame(fpkm_out), file="fpkm_out.txt",sep="\t",quote = FALSE)
    
    ###FC
    ####Analysis 1v1
    ExpNorLoc = {}
    for (i in c(1:10)) {
      for (j in c(1:10) ) {
        if (i < j){
          s3 = i*3
          s2 = i*3 -1
          s1 = i*3 -2
          s6 = j*3
          s5 = j*3 -1
          s4 = j*3 -2           
          countData <- countDataraw[,c(s1,s2,s3,s4,s5,s6)] # 
          colData <- colDataraw[c(s1,s2,s3,s4,s5,s6),]
          kword = paste (sub("NormalizedFPKM_","",cn[s1]),sub("NormalizedFPKM_","",cn[s4]),sep = "_VS_")
          fc <- paste("Log2Foldchange",kword,sep = "_")
          pv <- paste("Pvalue",kword,sep = "_")
          pj <- paste("adjPvalue",kword,sep = "_") ###
          all(rownames(colData) %in% colnames(countData))
          countData <- countData[ , rownames(colData)]
          all(rownames(colData) == colnames(countData))
          ddsHTSeq <- DESeqDataSetFromMatrix(countData = countData, 
                                             colData = colData, 
                                             design = ~ group)
    
          dds <- DESeq(ddsHTSeq) 
          res <- results(dds) 
          ExpNorLoc[[fc]] <- res$log2FoldChange
          ExpNorLoc[[pv]] <- res$pvalue
          ExpNorLoc[[pj]] <- res$padj  ###
        }
      }
    }
    
    
    
    
    
    
    # filter count
    keep <- rowSums(counts(dds) >= 10) >= 3  #过滤低表达基因,至少有3个样品都满足10个以上的reads数  
    dds <- dds[keep, ]
    
    dds <- DESeq(dds)
    res <- results(dds)
    diff = res
    diff <- na.omit(diff)  ## 去除缺失值NA
    dim(diff)
    write.csv(diff,"all_diff.csv")
    
    

    相关文章

      网友评论

        本文标题:DEseq2 计算FPKM和FC值

        本文链接:https://www.haomeiwen.com/subject/dxyazrtx.html