美文网首页
R相关性分析

R相关性分析

作者: wo_monic | 来源:发表于2021-10-11 14:14 被阅读0次

    两组时间向量相关性分析(皮尔逊分析)

    setwd("circos/test")
    #install.packages("ggpubr")
    library("ggpubr")
    data1 <- read.table("check.table",header = T,sep="\t")
    head(data1)
    

    检测两个变量是否符合正态分布(看散点是否分布在阴影中,是,则是正态分布)

    # gene
    ggqqplot(data1$Gene_coverge, ylab = "gene coverge")
    # LTR
    ggqqplot(data1$LTR_percent, ylab = "LTR")
    

    检测数据是否符合正态分布,p<0.05,不符合正态分布;p>0.05,符合正态分布

    shapiro.test(data1$Gene_coverge) # => p = 2.2e-16
    shapiro.test(data1$LTR_percent) # => p = 2.2e-16
    

    经过两种方法检测,本数据不符合正态分布,所以不能使用pearson检测相关性。所以使用kendall和spearman.

    皮尔逊方法,本实验不适用

    if (FALSE){
      res <- cor.test(data1$Gene_coverge, data1$LTR_percent,  method = "pearson")
      res
      #cor 是皮尔逊相关系数-0.9668788 (1正相关,-1负相关),df是自由度,p-value< 2.2e-16
      ggscatter(data1, x = "Gene_coverge", y = "LTR_percent", 
                add = "reg.line", conf.int = TRUE, 
                cor.coef = TRUE, cor.method = "pearson",
                xlab = "gene coverge", ylab = "LTR percent")
      
    }
    

    kendall方法

    res2 <- cor.test(data1$Gene_coverge, data1$LTR_percent,  method = "kendall")
    res2
    #tau是肯德尔相关系数-0.8825434 ,p-value<2.2e-16
    p2 <- ggscatter(data1, x = "Gene_coverge", y = "LTR_percent", 
              add = "reg.line", conf.int = TRUE, 
              cor.coef = TRUE, cor.method = "kendall",
              xlab = "gene coverge", ylab = "LTR percent")
    

    spearman方法

    res3 <- cor.test(data1$Gene_coverge, data1$LTR_percent,  method = "spearman")
    res3
    #rho是spearman相关系数-0.9799782 ,p-value < 2.2e-16
    p3 <- ggscatter(data1, x = "Gene_coverge", y = "LTR_percent", 
              add = "reg.line", conf.int = TRUE, 
              cor.coef = TRUE, cor.method = "spearman",
              xlab = "gene coverge", ylab = "LTR percent")+labs(title = "Correlation analysis of LTR and gene density")
    
    #cowplot::plot_grid(p2,p3,nrow=2,labels=c("a","b"))
    ggsave("LTR_gene_coverge.check.pdf",dpi = 300)
    ggsave("LTR_gene_coverge.check.tiff")
    
    data0 <- read.table("check3.table",header = T,sep = "\t")
    ggscatter(data0, x = "Gene_coverge", y = "Gypsy", 
              add = "reg.line", conf.int = TRUE, 
              cor.coef = TRUE, cor.method = "spearman",
              xlab = "gene coverge", ylab = "Gypsy")+labs(title = "Correlation analysis of Gypsy and gene density")
    ggsave("gypsy_gene_coverge.pdf")
    
    不在阴影区,说明不符合正态分布

    对整个数据框各组变量都进行相关性分析

    K<- read.csv("test.csv")
    cor_data <- cor(K,method = "kendall")
    library(corrplot)
    corrplot(cor_data, method="circle",type="lower")
    corrplot(cor_data, method="pie",type="lower")
    corrplot(cor_data, method="number")
    

    corrplot method参数"circle", "square", "ellipse", "number", "shade", "color", "pie" ;type参数; "full", "lower", "upper"

    相关文章

      网友评论

          本文标题:R相关性分析

          本文链接:https://www.haomeiwen.com/subject/ealloltx.html