绘图专题（1）：生物信息

作者: 挽山 | 来源:发表于2020-02-28 14:34 被阅读0次

绘图专题（1）：生物信息
【R语言与生物信息绘图】
2020-01-15 随机森林-原理及如何用R绘图
R数据可视化|图形初阶（上）
哈佛大学单细胞课程|笔记汇总（六）
Android绘图机制与处理技巧
GraphPad Prism 8 for Mac(绘图统计分析)
推荐一款超级牛掰的mac绘图统计分析软件——GraphPad P
生物信息 Linux学习1
生物信息神奇网站系列（二）：Python绘图Gallery

（一）热图

方法一

library(Rtsne)
library(ggplot2)
library(gplots)
library(RColorBrewer)
library(pheatmap)
library(readr) 
options(stringsAsFactors = F)

#读取数据（差异表达基因标准化矩阵，横轴为样本，纵轴为基因）
exprSet<-read.table(file.choose(),header = T,sep="\t") 
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)

sampleTraits<-read_csv("2-datTraits_95.csv", col_names = T) 
condition<-factor(sampleTraits$Diagnosis,levels = c("ASD","CTL")); condition
colData<-data.frame(row.names = colnames(exprSet[,-1]), condition); colData

rownames(exprSet)=exprSet[,1]
exprSet=exprSet[,-1]
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)

#按颜色划分患者分组信息
color<-factor(colData$condition,
              labels = c('#00AFBB', "#E7B800"),
              levels = c("ASD","CTL"))

heatmap.2(as.matrix(exprSet),#数据转换
          col = greenred(75), 
          hclust = function(x) 
            hclust(x, method = 'ward.D2'),
          distfun = function(x) 
            dist(x,method ='euclidean'),
          scale = "row", #“none”, “row”, “column”
          dendrogram = 'both',
          key = TRUE, symkey = FALSE, 
          density.info = "none", trace = "none", cexRow = 0.5,
          Rowv = TRUE,
          ColSideColors = as.character(color))

方法二

library(gplots)
library(RColorBrewer)
options(stringsAsFactors = F)

#setwd("/Users/Davey/Desktop/VennDiagram/")

#读取数据（差异表达基因标准化矩阵，横轴为样本，纵轴为基因）
exprSet<-read.table(file.choose(),header = T,sep="\t") 
#exprSet<-read.csv(file.choose(),header = T,sep=",") 
head(exprSet); dim(exprSet)

colnames(exprSet)[1]<-"Symbol"; head(exprSet); dim(exprSet)

#载入注释文件
symbol2id<-read.table(file.choose(),header=T, sep="\t") 
names(symbol2id)
#注释，保留有ID的gene
symbol2id_exp<-merge(exprSet,symbol2id,by.x="Symbol",by.y="Symbol",all=F,sort=F)
names(symbol2id_exp); dim(symbol2id_exp)
expdat<-symbol2id_exp[,-c(14,15)]; names(expdat)

#处理数据,第一列作为exprset的行名（基因），列名为样本号
row.names(expdat)<-expdat[,1]
expdat<-expdat[,-1]
head(expdat)

#pheatmap包绘制 https://www.jianshu.com/p/1c55ea64ff3f
#https://wenku.baidu.com/view/b3d959fc1eb91a37f0115c2e.html(说明书)
if(!suppressWarnings(require(pheatmap)))
{
  install.packages('pheatmap')
  require(pheatmap)
}
library(pheatmap)

#构建列注释信息（病例对照）
annotation_col<-data.frame(factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),levels = c("ASD","Healthy"))) #注意改样本号
colnames(annotation_col)<-'Group'
rownames(annotation_col)<-colnames(expdat)
head(annotation_col)

##构建行注释信息（基因分组）：必须与原矩阵项目一致
annotation_row<-data.frame(factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 5))))
colnames(annotation_row)<-'GeneClass'
rownames(annotation_row)<-rownames(expdat)
head(annotation_row)

#自定注释信息的颜色列表
ann_colors = list( #Time = c("white", "firebrick"),
                    group_12 = c(ASD = "#1B9E77", Healthy = "#D95F02"), #注意改样本号
                    GeneClass = c(Path1 = "#F5F5F5", Path2 = "#E7298A", Path3 = "#66A61E"))


#绘图
HC<-pheatmap(expdat, 
         scale = "row",#归一化
         clustering_method = "average",#设定不同聚类方法，默认为"complete"（'ward', 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'）
         clustering_distance_rows = "correlation", #设定行聚类距离方法为Pearson corralation，默认为欧氏距离"euclidean"
         color = colorRampPalette(c("navy", "white", "firebrick3"))(50), #改变主体颜色
         treeheight_row = 30, #聚类树高度
         treeheight_col = 50,
         #cellwidth = 15, #格子大小
         #cellheight = 12, 
         annotation_row = annotation_row, #注释行
         annotation_col = annotation_col, #注释列
         #annotation_legend = FALSE, #无注释图例
         annotation_colors = ann_colors, #自定义图例颜色
         #display_numbers = TRUE, #显示格子数值
         #number_color = "blue", #数值字体颜色
         #number_format = "%.1e", #是否科学计数法
         #display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test)), #重点标记格子
         #cluster_row = FALSE, #不对行聚类
         #legend_breaks = c(1:5), #设定图例间距和显示范围
         #legend_labels = c("1.0","2.0","3.0","4.0","5.0"),
         #legend = FALSE, #无图例
         #border_color = "red", #格子边界颜色
         border=FALSE, #无边界
         #show_rownames=F,#不显示行名列名
         #show_colnames=F，
         #Colv = if(symm)######
         main = "Example heatmap" )#主标题)
HC

#自定义画法=======================================================================================
#患者分组
colData_12<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),levels = c("ASD","Healthy"))
#colData_52<-factor(c(rep("Healthy",37),rep("ASD",13),rep("Healthy",2)),levels = c("ASD","Healthy"))
#样本号一列，分组一列
colData<-data.frame(colnames(expdat), colData_12) #注意改样本号
colnames(colData)<-c('sample', 'group');colData

#按颜色划分患者分组信息
color<-factor(colData$group,
              labels = c('yellow','blue'),
              levels = c("ASD","Healthy"))

#参数具体含义 https://blog.csdn.net/lalaxumelala/article/details/86022722、 http://blog.sina.com.cn/s/blog_4a0824490102v7aa.html
heatmap.2(as.matrix(expdat),#数据转换
          col = greenred(75), 
          hclust = function(x) 
            hclust(x, method = 'ward.D2'),
          distfun = function(x) 
            dist(x,method ='euclidean'),
          scale = "row", 
          dendrogram = 'both',
          key = TRUE, 
          symkey = FALSE, 
          density.info = "none", 
          trace = "none", 
          cexRow = 0.5,
          ColSideColors = as.character(color))

内部颜色调整 https://www.jianshu.com/p/ee89ac9f9066

（二）t-SNE

library(Rtsne)
# 表达矩阵
exprSet<-read.table(file.choose(),header = T,sep="\t") 
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)

sampleTraits<-read_csv("2-datTraits_95.csv", col_names = T) 
condition<-factor(sampleTraits$Diagnosis,levels = c("ASD","CTL")); condition
colData<-data.frame(row.names = colnames(exprSet[,-1]), condition); colData

rownames(exprSet)=exprSet[,1]
exprSet=exprSet[,-1]
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)

##tsne
data<-as.matrix(t(exprSet))
tsne_result<-Rtsne(data, dims=2, perplexity = 19, pca = F, theta=0.5)
tsne_plot<-data.frame(tSNE1 = tsne_result$Y[,1], 
                      tSNE2 = tsne_result$Y[,2], 
                      Type = condition)
# 可视化
ggplot(tsne_plot) + 
  geom_point(aes(x=tSNE1, y=tSNE2, color=condition),size=2) +
  theme_bw(base_size = 12, base_family = "") +
  theme(legend.justification=c(0,0),
        legend.position = c(0.8,0.75),
        legend.title = element_blank(),
        legend.text = element_text(size = 10)) + 
  #ggtitle("t-SNE Clustering") + 
  theme(plot.title = element_text(hjust = 0.5))

dev.off()

#perplexity：浮点型，可选n/5（默认：30）较大的数据集通常需要更大的perplexity。
#考虑选择一个介于5和50之间的值。由于t-SNE对这个参数非常不敏感，所以选择并不是非常重要。

（三）PCA

参考：https://www.jianshu.com/p/dd4e842b6707

#install.packages('FactoMineR')
#install.packages('factoextra')

dataTrait<-read.csv(file.choose(),header = T, row.names = 1, sep = ",");names(dataTrait)
group_list<-dataTrait$Diagnosis; summary(diagnosis) #38 ASD  36 CTL

exprSet<-read.csv(file.choose(),header = T,row.names = 1, sep = ",") 
dat<-exprSet
dat<-log2(dat+1)
dat[1:4,1:4]
group_list=condition
## 下面是画PCA的必须操作，需要看说明书。
dat=t(dat)#画PCA图时要求是行名时样本名，列名时探针名，因此此时需要转换
dat=as.data.frame(dat)#将matrix转换为data.frame
dat=cbind(dat,group_list) #cbind横向追加，即将分组信息追加到最后一列
#dat<-as.data.frame(dat)
library(FactoMineR)#画主成分分析图需要加载这两个包
library(factoextra) 
# The variable group_list (index = 54676) is removed
# before PCA analysis
dat.pca <- PCA(dat[,-ncol(dat)], graph = FALSE)#现在dat最后一列是group_list，需要重新赋值给一个dat.pca,这个矩阵是不含有分组信息的
fviz_pca_ind(dat.pca,
             geom.ind = "point", # show points only (nbut not "text")
             col.ind = dat$group_list, # color by groups
             palette = c("#7ec0ee", "#ffaeb9"),
             addEllipses = TRUE, # Concentration ellipses
             legend.title = "Groups"
)

（四）火山图

#DESeq2结果画火山图
library(ggplot2)
library(ggthemes)
library(Cairo)

data<-read.table(file.choose(),header = T,sep = "\t")# file=原始diff_result.txt
head(data)
#删除空列
data<-na.omit(data)

#设置图片格式
Cairo(file="volcan_PNG_300_PFC_|FC|1.2.png", 
      type="png",
      units="in",
      bg="white",
      width=5.5, 
      height=5, 
      pointsize=12, 
      dpi=300)

#设置颜色阈
data$threshold<-as.factor(ifelse(data$pvalue < 0.05 & abs(data$log2FoldChange) > 0.26,
                                 ifelse(data$log2FoldChange > 0.26,"Up","Down"),"Not"))
#导入windows字体
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
             RMN=windowsFont("Times New Roman"),
             ARL=windowsFont("Arial"))
#画图
plot_DEGs<-ggplot(data = data, 
                  #坐标轴数据对应
                  aes(x=log2FoldChange, y=-log10(pvalue), color=threshold))+
                  #颜色
                  scale_color_manual(values=c("blue","red","grey"))+
                  #点的形状大小
                  geom_point(alpha=0.4,size=1.2)+
  
  
  #标题\坐标轴名称
  labs(title= "Volcan plot of PFC set ", x="log2FoldChange", y="-log10pvalue")+
  theme_update(plot.title = element_text(hjust = 0.5))+
  #坐标轴范围     
  xlim(-3,3)+
  ylim(1,7)+
  #主字号，字体
  theme_bw(base_size = 12, base_family = "ARL") +
  #辅助线
  geom_vline(xintercept = c(-log2(1.2),log2(1.2)),lty=4,lwd=0.5,col="grey")+
  geom_hline(yintercept = -log10(0.05),lty=4,lwd=0.5,col="grey")+
  
  #去掉原ggplot2背景
  theme(panel.grid.major =element_blank(),
        panel.grid.minor = element_blank(),
        panel.background=element_blank(),
        #图列属性
        #legend.position = "right",
        #legend.title = element_blank(),
        #legend.text= element_text(face="bold", color="black",family = "ARL", size=8),
        #全图字体
        plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(face="bold", color="black", size=12),
        axis.text.y = element_text(face="bold",  color="black", size=12),
        axis.title.x = element_text(face="bold", color="black", size=12),
        axis.title.y = element_text(face="bold",color="black", size=12))
            
plot_DEGs+theme_set(theme_bw())
plot_DEGs
# close
dev.off()

绘图专题（1）：生物信息
（一）热图方法一方法二内部颜色调整 https://www.jianshu.com/p/ee89ac9f90...
【R语言与生物信息绘图】
喜欢这个劳斯，来看他的生信绘图课，每一节就几分钟。https://www.bilibili.com/video/B...
2020-01-15 随机森林-原理及如何用R绘图
参考学习资料：菜鸟团生信专题 StatQuest生物统计学专题 - 决策树(1) StatQuest生物统计学专题...
R数据可视化|图形初阶（上）
在生物信息学中常常会用到R语言来绘制图形以实现数据可视化，本文主要通过plot( )高级绘图函数来介绍R语言绘图的...
哈佛大学单细胞课程|笔记汇总（六）
生物信息学习的正确姿势 NGS系列文章包括 NGS基础、在线绘图、转录组分析（ Nature重磅综述|关于R...
Android绘图机制与处理技巧
参考资料目录 1）屏幕尺寸信息 2）2D绘图基础 3）XML绘图 4）绘图技巧 5）图像处理-色彩特效处理 6）...
GraphPad Prism 8 for Mac(绘图统计分析)
GraphPad Prism 8 for Mac是一款集生物统计、曲线拟合和科技绘图于一体生物医学绘图工具，也是医...
推荐一款超级牛掰的mac绘图统计分析软件——GraphPad P
GraphPad Prism 8 for Mac是一款集生物统计、曲线拟合和科技绘图于一体生物医学绘图工具，也是医...
生物信息 Linux学习1
本教程宗旨(请仔细阅读) 本教程不是一个完备的Linux使用教程，而是侧重于一般教程默认读者会了的信息，包括基本概...
生物信息神奇网站系列（二）：Python绘图Gallery
前面我们介绍了R语言绘图的Gallery，受到三名用户的强烈反响，他们情绪激动，难以抑制内心的激动与喜悦。这次我们...