美文网首页R学习与可视化画图,可视化
Day7-R语言-basic visualization for

Day7-R语言-basic visualization for

作者: 天涯清水 | 来源:发表于2020-01-06 22:34 被阅读0次
    R数据分析和画图

    参考原文:生信技能树论坛basic visualization for expression matrix
    结合Jimmy大神的B站视频(P24)生信人应该这样学R语言

    安装并加载必须的packages

    如果你还没有安装,就运行下面的代码安装:

    Bio('CLL')BiocManager::install("KEGG.db",
    install.packages('corrplot')
    install.packages('gpairs')
    install.packages('vioplot')
    

    如果你安装好了,就直接加载它们即可

    library(CLL)
    library(ggplot2)
    library(reshape2)
    library(gpairs)
    library(corrplot)
    

    加载内置的测试数据:

    data(sCLLex)
    sCLLex=sCLLex[,1:8] ## 样本太多,我就取前面8个
    group_list=sCLLex$Disease
    exprSet=exprs(sCLLex)
    
    head(exprSet)
    
    ##           CLL11.CEL CLL12.CEL CLL13.CEL CLL14.CEL CLL15.CEL CLL16.CEL
    ## 1000_at    5.743132  6.219412  5.523328  5.340477  5.229904  4.920686
    ## 1001_at    2.285143  2.291229  2.287986  2.295313  2.662170  2.278040
    ## 1002_f_at  3.309294  3.318466  3.354423  3.327130  3.365113  3.568353
    ## 1003_s_at  1.085264  1.117288  1.084010  1.103217  1.074243  1.073097
    ## 1004_at    7.544884  7.671801  7.474025  7.152482  6.902932  7.368660
    ## 1005_at    5.083793  7.610593  7.631311  6.518594  5.059087  4.855161
    ##           CLL17.CEL CLL18.CEL
    ## 1000_at    5.325348  4.826131
    ## 1001_at    2.350796  2.325163
    ## 1002_f_at  3.502440  3.394410
    ## 1003_s_at  1.091264  1.076470
    ## 1004_at    6.456285  6.824862
    ## 1005_at    5.176975  4.874563
    
    group_list
    
    ## [1] progres. stable   progres. progres. progres. progres. stable   stable  
    ## Levels: progres. stable
    

    接下来进行一系列绘图操作

    主要用到ggplot2这个包,需要把我们的宽矩阵用reshape2包变成长矩阵

    library(reshape2)
    exprSet_L=melt(exprSet)
    colnames(exprSet_L)=c('probe','sample','value')
    exprSet_L$group=rep(group_list,each=nrow(exprSet))
    head(exprSet_L)
    
    ##       probe    sample    value    group
    ## 1   1000_at CLL11.CEL 5.743132 progres.
    ## 2   1001_at CLL11.CEL 2.285143 progres.
    ## 3 1002_f_at CLL11.CEL 3.309294 progres.
    ## 4 1003_s_at CLL11.CEL 1.085264 progres.
    ## 5   1004_at CLL11.CEL 7.544884 progres.
    ## 6   1005_at CLL11.CEL 5.083793 progres.
    

    boxplot

    p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()
    print(p)
    
    image.png

    vioplot

    #library(vioplot)
    #?vioplot
    #vioplot(exprSet)
    #do.call(vioplot,c(unname(exprSet),col='red',drawRect=FALSE,names=list(names(exprSet))))
    p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_violin()
    print(p)
    
    image.png

    boxplot

    p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()
    print(p)
    
    image.png

    histogram

    p=ggplot(exprSet_L,aes(value,fill=group))+geom_histogram(bins = 200)+facet_wrap(~sample, nrow = 4)
    print(p)
    
    image.png

    boxplot

    p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()
    print(p)
    
    image.png

    density

    p=ggplot(exprSet_L,aes(value,col=group))+geom_density()+facet_wrap(~sample, nrow = 4)
    print(p)
    

    [图片上传中...(image-1167c7-1560694447736-9)]

    p=ggplot(exprSet_L,aes(value,col=group))+geom_density() 
    print(p)
    

    [图片上传中...(image-48616c-1560694447736-8)]

    gpairs

    library(gpairs)
    gpairs(exprSet
           #,upper.pars = list(scatter = 'stats') 
           #,lower.pars = list(scatter = 'corrgram')
           )
    
    image.png

    cluster

    out.dist=dist(t(exprSet),method='euclidean')
    out.hclust=hclust(out.dist,method='complete')
    plot(out.hclust)
    
    image.png

    PCA

    pc <- prcomp(t(exprSet),scale=TRUE)
    pcx=data.frame(pc$x)
    pcr=cbind(samples=rownames(pcx),group_list, pcx) 
    p=ggplot(pcr, aes(PC1, PC2))+geom_point(size=5, aes(color=group_list)) +
      geom_text(aes(label=samples),hjust=-0.1, vjust=-0.3)
    print(p)
    
    image.png

    heatmap

    choose_gene=names(sort(apply(exprSet, 1, mad),decreasing = T)[1:50])
    choose_matrix=exprSet[choose_gene,]
    choose_matrix=scale(choose_matrix)
    heatmap(choose_matrix)
    
    image.png
    library(gplots)
    
    ## 
    ## Attaching package: 'gplots'
    
    ## The following object is masked from 'package:stats':
    ## 
    ##     lowess
    
    heatmap.2(choose_matrix)
    

    [图片上传中...(image-376031-1560694447735-3)]

    library(pheatmap)
    pheatmap(choose_matrix)
    

    [图片上传中...(image-9b4742-1560694447735-2)]

    DEG && volcano plot

    library(limma)
    
    ## 
    ## Attaching package: 'limma'
    
    ## The following object is masked from 'package:BiocGenerics':
    ## 
    ##     plotMA
    
    design=model.matrix(~factor(group_list))
    fit=lmFit(exprSet,design)
    fit=eBayes(fit)
    DEG=topTable(fit,coef=2,n=Inf)
    with(DEG, plot(logFC, -log10(P.Value), pch=20, main="Volcano plot"))      
    
    image.png
    logFC_cutoff <- with(DEG,mean(abs( logFC)) + 2*sd(abs( logFC)) )
    DEG$change = as.factor(ifelse(DEG$P.Value < 0.05 & abs(DEG$logFC) > logFC_cutoff,
                                  ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')
                           )
    this_tile <- paste0('Cutoff for logFC is ',round(logFC_cutoff,3),
                        '\nThe number of up gene is ',nrow(DEG[DEG$change =='UP',]) ,
                        '\nThe number of down gene is ',nrow(DEG[DEG$change =='DOWN',])
    )
    
    g = ggplot(data=DEG, aes(x=logFC, y=-log10(P.Value), color=change)) +
      geom_point(alpha=0.4, size=1.75) +
      theme_set(theme_set(theme_bw(base_size=20)))+
      xlab("log2 fold change") + ylab("-log10 p-value") +
      ggtitle( this_tile  ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
      scale_colour_manual(values = c('blue','black','red'))  ## corresponding to the levels(res$change)
    print(g)
    
    image.png

    ggplot画图是可以切换主题的

    其实绘图有非常多的细节可以调整,还是略微有点繁琐的!

    p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()
    print(p)
    
    image.png
    p=p+stat_summary(fun.y="mean",geom="point",shape=23,size=3,fill="red")
    p=p+theme_set(theme_set(theme_bw(base_size=20)))
    p=p+theme(text=element_text(face='bold'),axis.text.x=element_text(angle=30,hjust=1),axis.title=element_blank())
    print(p)
    
    image.png

    Boxplot outlier: 箱线图极端值

    Boxplot Graph: 箱线图

    Boxplot t: 盒式图

    相关文章

      网友评论

        本文标题:Day7-R语言-basic visualization for

        本文链接:https://www.haomeiwen.com/subject/tjcofctx.html