美文网首页
Bulk RNA(普通转录组)多组差异基因分析函数

Bulk RNA(普通转录组)多组差异基因分析函数

作者: KS科研分享与服务 | 来源:发表于2024-03-20 09:00 被阅读0次

    最近好几个小伙伴都在用我们之前的一个bulk RNA多组差异分析函数(重启之普通R转录组分析(3):写一个通用的Deseq2多组差异分析函数),有些小问题,或者说一些功能不完善。所以我们这次进行了升级和优化。首先将bulk差异分析3大包DEseq2 edgeR limma都纳入进来了。第二,让您的分析更加简单,不用担心复杂的代码和分组设置(很多时候还搞错谁vs谁)。第三, 不论你的两组样本重复数是否相同,都可以进行(无重复样本分析没有,主要是我认为你算法再优化,也清除不了个体差异随机性)。第四,多组样本的分析!轻松化解! 相信随着目前bulk的普及和便宜化,我们这个函数还是很有用的!

    参考:
    https://www.bilibili.com/video/BV1n2421A7oJ/?spm_id_from=333.999.0.0&vd_source=05b5479545ba945a8f5d7b2e7160ea34

    首先我们看一下函数的参数,也是很简洁:



    然后我们测试一下:首先是两组等重复样本,很简单!

    
    setwd('D:\\KS项目\\公众号文章\\Bulk多组差异基因分析函数')
    
    #一、两组分析(样本数相等)-------------------------------------------------------------------
    
    df1 <- read.csv("Two_group.csv", header = T, row.names = 1)
    colnames(df1)
    # [1] "Cancer1" "Cancer2" "Cancer3" "Health1" "Health2" "Health3"
    
    meta1 <- data.frame(Cancer=c("Cancer1" ,"Cancer2" ,"Cancer3"),
                        Health=c("Health1", "Health2", "Health3"))
    
    deg1_Deseq2 <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df1,
                                              meta = meta1,
                                              methods = "DESeq2",
                                              test = "Cancer",
                                              control = "Health",
                                              repNum1 = 3,
                                              repNum2 = 3)
    
    
    deg1_edgeR <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df1,
                                              meta = meta1,
                                              methods = "edgeR",
                                              test = "Cancer",
                                              control = "Health",
                                              repNum1 = 3,
                                              repNum2 = 3)
    
    deg1_limma <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df1,
                                             meta = meta1,
                                             methods = "limma",
                                             test = "Cancer",
                                             control = "Health",
                                             repNum1 = 3,
                                             repNum2 = 3)
    

    重复数不等的两组样本分析也是如此,只要设置好test和control即可:

    
    #二、两组分析(样本数不同)-------------------------------------------------------------------
    
    df4 <- read.csv("two_diff_group.csv", header = T, row.names = 1)
    colnames(df4)
    # [1] "HC_1"  "HC_2"  "HC_3"  "HC_4"  "PPC_1" "PPC_2" "PPC_3" "PPC_4" "PPC_5" "PPC_6"
    
    meta4 <- data.frame(HC=c("HC_1","HC_2","HC_3","HC_4","",""),
                        PPC=c("PPC_1","PPC_2","PPC_3","PPC_4","PPC_5","PPC_6"))
    
    deg4_Deseq2 <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df4,
                                              meta = meta4,
                                              methods = "DESeq2",
                                              test = "PPC",
                                              control = "HC",
                                              repNum1 = 6,
                                              repNum2 = 4)
    
    deg4_edgeR <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df4,
                                              meta = meta4,
                                              methods = "edgeR",
                                              test = "PPC",
                                              control = "HC",
                                              repNum1 = 6,
                                              repNum2 = 4)
    
    
    deg4_limma <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df4,
                                             meta = meta4,
                                             methods = "limma",
                                             test = "PPC",
                                             control = "HC",
                                             repNum1 = 6,
                                             repNum2 = 4)
    

    我们可以对比下三种方法差异基因结果,发现肯定不是完全一样,那是因为算法不一样,不用纠结。在我们这个数据里面可以看出,DEseq2和edgeR的结果重合还是挺多的。

    #韦恩图
    deg4_Deseq2_sig <- deg4_Deseq2[which(abs(deg4_Deseq2$log2FoldChange)>0 & deg4_Deseq2$pvalue <=0.05),]
    deg4_edgeR_sig <- deg4_edgeR[which(abs(deg4_edgeR$logFC)>0 & deg4_edgeR$PValue <=0.05),]
    deg4_limma_sig <- deg4_limma[which(abs(deg4_limma$logFC)>0 & deg4_limma$P.Value <=0.05),]
    
    
    library(ggvenn)
    library(tidyverse)
    Venn_list <- list(deg4_Deseq2_sig=rownames(deg4_Deseq2_sig),
                      deg4_edgeR_sig=rownames(deg4_edgeR_sig),
                      deg4_limma_sig=rownames(deg4_limma_sig))
    #使用list_to_data_frame将list转化为data.frame
    data_veen = list_to_data_frame(Venn_list)
    
    ggvenn(Venn_list,
           show_percentage = T,
           show_elements = F,
           text_size=3,
           digits = 1,
           set_name_size=4,
           stroke_color = "grey30",
           fill_color = c("#FF8C00","#4DAF4A","#B64E89"),
           set_name_color = c("#FF8C00","#4DAF4A","#B64E89"))
    

    多组样本的分析就更简单了:大多设置都设置好了。这里需要强调一点,那就是对于多组样本,在进行meta设置的时候,test组靠前,control靠后!

    #四、多组test------------------------------------------------------------------
    
    df3 <- read.csv("count_gene.csv", header = T, row.names = 1, check.names = F)
    colnames(df3)
    # [1] "Fbrain 1" "Fbrain 2" "Fbrain 3" "Fbrain 4" "Fhom 1"   "Fhom 2"   "Fhom 3"  
    # [8] "Mbrain 1" "Mbrain 2" "Mbrain 3" "Mbrain 4" "Mbrain 5" "Mhom 1"   "Mhom 2"  
    # [15] "Mhom 3"   "Mhom 4" 
    
    meta3 <- data.frame(Mhom = c("Mhom 1","Mhom 2", "Mhom 3","Mhom 4",""),
                        Fhom  = c("Fhom 1","Fhom 2","Fhom 3","",""),
                        Mbrain=c("Mbrain 1","Mbrain 2","Mbrain 3","Mbrain 4","Mbrain 5"),
                        Fbrain=c("Fbrain 1","Fbrain 2","Fbrain 3","Fbrain 4",""))
    
    deg3_Deseq2 <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df3,
                                              meta = meta3,
                                              methods = "DESeq2",
                                              separator=" ")
    
    
    deg3_edgeR <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df3,
                                             meta = meta3,
                                             methods = "edgeR",
                                             separator=" ")
    
    deg3_limma <- KS_bulkRNA_MultiGroup_DEGs(exprSet = df3,
                                             meta = meta3,
                                             methods = "limma",
                                             separator=" ")
    

    好了,这就是所有的内容了,希望对你有帮助!

    相关文章

      网友评论

          本文标题:Bulk RNA(普通转录组)多组差异基因分析函数

          本文链接:https://www.haomeiwen.com/subject/guswzdtx.html