(一)热图
library(Rtsne)
library(ggplot2)
library(gplots)
library(RColorBrewer)
library(pheatmap)
library(readr)
options(stringsAsFactors = F)
#读取数据(差异表达基因标准化矩阵,横轴为样本,纵轴为基因)
exprSet<-read.table(file.choose(),header = T,sep="\t")
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)
sampleTraits<-read_csv("2-datTraits_95.csv", col_names = T)
condition<-factor(sampleTraits$Diagnosis,levels = c("ASD","CTL")); condition
colData<-data.frame(row.names = colnames(exprSet[,-1]), condition); colData
rownames(exprSet)=exprSet[,1]
exprSet=exprSet[,-1]
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)
#按颜色划分患者分组信息
color<-factor(colData$condition,
labels = c('#00AFBB', "#E7B800"),
levels = c("ASD","CTL"))
heatmap.2(as.matrix(exprSet),#数据转换
col = greenred(75),
hclust = function(x)
hclust(x, method = 'ward.D2'),
distfun = function(x)
dist(x,method ='euclidean'),
scale = "row", #“none”, “row”, “column”
dendrogram = 'both',
key = TRUE, symkey = FALSE,
density.info = "none", trace = "none", cexRow = 0.5,
Rowv = TRUE,
ColSideColors = as.character(color))
library(gplots)
library(RColorBrewer)
options(stringsAsFactors = F)
#setwd("/Users/Davey/Desktop/VennDiagram/")
#读取数据(差异表达基因标准化矩阵,横轴为样本,纵轴为基因)
exprSet<-read.table(file.choose(),header = T,sep="\t")
#exprSet<-read.csv(file.choose(),header = T,sep=",")
head(exprSet); dim(exprSet)
colnames(exprSet)[1]<-"Symbol"; head(exprSet); dim(exprSet)
#载入注释文件
symbol2id<-read.table(file.choose(),header=T, sep="\t")
names(symbol2id)
#注释,保留有ID的gene
symbol2id_exp<-merge(exprSet,symbol2id,by.x="Symbol",by.y="Symbol",all=F,sort=F)
names(symbol2id_exp); dim(symbol2id_exp)
expdat<-symbol2id_exp[,-c(14,15)]; names(expdat)
#处理数据,第一列作为exprset的行名(基因),列名为样本号
row.names(expdat)<-expdat[,1]
expdat<-expdat[,-1]
head(expdat)
#pheatmap包绘制 https://www.jianshu.com/p/1c55ea64ff3f
#https://wenku.baidu.com/view/b3d959fc1eb91a37f0115c2e.html(说明书)
if(!suppressWarnings(require(pheatmap)))
{
install.packages('pheatmap')
require(pheatmap)
}
library(pheatmap)
#构建列注释信息(病例对照)
annotation_col<-data.frame(factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),levels = c("ASD","Healthy"))) #注意改样本号
colnames(annotation_col)<-'Group'
rownames(annotation_col)<-colnames(expdat)
head(annotation_col)
##构建行注释信息(基因分组):必须与原矩阵项目一致
annotation_row<-data.frame(factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 5))))
colnames(annotation_row)<-'GeneClass'
rownames(annotation_row)<-rownames(expdat)
head(annotation_row)
#自定注释信息的颜色列表
ann_colors = list( #Time = c("white", "firebrick"),
group_12 = c(ASD = "#1B9E77", Healthy = "#D95F02"), #注意改样本号
GeneClass = c(Path1 = "#F5F5F5", Path2 = "#E7298A", Path3 = "#66A61E"))
#绘图
HC<-pheatmap(expdat,
scale = "row",#归一化
clustering_method = "average",#设定不同聚类方法,默认为"complete"('ward', 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid')
clustering_distance_rows = "correlation", #设定行聚类距离方法为Pearson corralation,默认为欧氏距离"euclidean"
color = colorRampPalette(c("navy", "white", "firebrick3"))(50), #改变主体颜色
treeheight_row = 30, #聚类树高度
treeheight_col = 50,
#cellwidth = 15, #格子大小
#cellheight = 12,
annotation_row = annotation_row, #注释行
annotation_col = annotation_col, #注释列
#annotation_legend = FALSE, #无注释图例
annotation_colors = ann_colors, #自定义图例颜色
#display_numbers = TRUE, #显示格子数值
#number_color = "blue", #数值字体颜色
#number_format = "%.1e", #是否科学计数法
#display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test)), #重点标记格子
#cluster_row = FALSE, #不对行聚类
#legend_breaks = c(1:5), #设定图例间距和显示范围
#legend_labels = c("1.0","2.0","3.0","4.0","5.0"),
#legend = FALSE, #无图例
#border_color = "red", #格子边界颜色
border=FALSE, #无边界
#show_rownames=F,#不显示行名列名
#show_colnames=F,
#Colv = if(symm)######
main = "Example heatmap" )#主标题)
HC
#自定义画法=======================================================================================
#患者分组
colData_12<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),levels = c("ASD","Healthy"))
#colData_52<-factor(c(rep("Healthy",37),rep("ASD",13),rep("Healthy",2)),levels = c("ASD","Healthy"))
#样本号一列,分组一列
colData<-data.frame(colnames(expdat), colData_12) #注意改样本号
colnames(colData)<-c('sample', 'group');colData
#按颜色划分患者分组信息
color<-factor(colData$group,
labels = c('yellow','blue'),
levels = c("ASD","Healthy"))
#参数具体含义 https://blog.csdn.net/lalaxumelala/article/details/86022722、 http://blog.sina.com.cn/s/blog_4a0824490102v7aa.html
heatmap.2(as.matrix(expdat),#数据转换
col = greenred(75),
hclust = function(x)
hclust(x, method = 'ward.D2'),
distfun = function(x)
dist(x,method ='euclidean'),
scale = "row",
dendrogram = 'both',
key = TRUE,
symkey = FALSE,
density.info = "none",
trace = "none",
cexRow = 0.5,
ColSideColors = as.character(color))
(二)t-SNE
library(Rtsne)
# 表达矩阵
exprSet<-read.table(file.choose(),header = T,sep="\t")
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)
sampleTraits<-read_csv("2-datTraits_95.csv", col_names = T)
condition<-factor(sampleTraits$Diagnosis,levels = c("ASD","CTL")); condition
colData<-data.frame(row.names = colnames(exprSet[,-1]), condition); colData
rownames(exprSet)=exprSet[,1]
exprSet=exprSet[,-1]
head(exprSet)[c(1:3),c(1:3)]; dim(exprSet)
##tsne
data<-as.matrix(t(exprSet))
tsne_result<-Rtsne(data, dims=2, perplexity = 19, pca = F, theta=0.5)
tsne_plot<-data.frame(tSNE1 = tsne_result$Y[,1],
tSNE2 = tsne_result$Y[,2],
Type = condition)
# 可视化
ggplot(tsne_plot) +
geom_point(aes(x=tSNE1, y=tSNE2, color=condition),size=2) +
theme_bw(base_size = 12, base_family = "") +
theme(legend.justification=c(0,0),
legend.position = c(0.8,0.75),
legend.title = element_blank(),
legend.text = element_text(size = 10)) +
#ggtitle("t-SNE Clustering") +
theme(plot.title = element_text(hjust = 0.5))
dev.off()
#perplexity:浮点型,可选n/5(默认:30)较大的数据集通常需要更大的perplexity。
#考虑选择一个介于5和50之间的值。由于t-SNE对这个参数非常不敏感,所以选择并不是非常重要。
(三)PCA
#install.packages('FactoMineR')
#install.packages('factoextra')
dataTrait<-read.csv(file.choose(),header = T, row.names = 1, sep = ",");names(dataTrait)
group_list<-dataTrait$Diagnosis; summary(diagnosis) #38 ASD 36 CTL
exprSet<-read.csv(file.choose(),header = T,row.names = 1, sep = ",")
dat<-exprSet
dat<-log2(dat+1)
dat[1:4,1:4]
group_list=condition
## 下面是画PCA的必须操作,需要看说明书。
dat=t(dat)#画PCA图时要求是行名时样本名,列名时探针名,因此此时需要转换
dat=as.data.frame(dat)#将matrix转换为data.frame
dat=cbind(dat,group_list) #cbind横向追加,即将分组信息追加到最后一列
#dat<-as.data.frame(dat)
library(FactoMineR)#画主成分分析图需要加载这两个包
library(factoextra)
# The variable group_list (index = 54676) is removed
# before PCA analysis
dat.pca <- PCA(dat[,-ncol(dat)], graph = FALSE)#现在dat最后一列是group_list,需要重新赋值给一个dat.pca,这个矩阵是不含有分组信息的
fviz_pca_ind(dat.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = dat$group_list, # color by groups
palette = c("#7ec0ee", "#ffaeb9"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
(四)火山图
#DESeq2结果画火山图
library(ggplot2)
library(ggthemes)
library(Cairo)
data<-read.table(file.choose(),header = T,sep = "\t")# file=原始diff_result.txt
head(data)
#删除空列
data<-na.omit(data)
#设置图片格式
Cairo(file="volcan_PNG_300_PFC_|FC|1.2.png",
type="png",
units="in",
bg="white",
width=5.5,
height=5,
pointsize=12,
dpi=300)
#设置颜色阈
data$threshold<-as.factor(ifelse(data$pvalue < 0.05 & abs(data$log2FoldChange) > 0.26,
ifelse(data$log2FoldChange > 0.26,"Up","Down"),"Not"))
#导入windows字体
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"))
#画图
plot_DEGs<-ggplot(data = data,
#坐标轴数据对应
aes(x=log2FoldChange, y=-log10(pvalue), color=threshold))+
#颜色
scale_color_manual(values=c("blue","red","grey"))+
#点的形状大小
geom_point(alpha=0.4,size=1.2)+
#标题\坐标轴名称
labs(title= "Volcan plot of PFC set ", x="log2FoldChange", y="-log10pvalue")+
theme_update(plot.title = element_text(hjust = 0.5))+
#坐标轴范围
xlim(-3,3)+
ylim(1,7)+
#主字号,字体
theme_bw(base_size = 12, base_family = "ARL") +
#辅助线
geom_vline(xintercept = c(-log2(1.2),log2(1.2)),lty=4,lwd=0.5,col="grey")+
geom_hline(yintercept = -log10(0.05),lty=4,lwd=0.5,col="grey")+
#去掉原ggplot2背景
theme(panel.grid.major =element_blank(),
panel.grid.minor = element_blank(),
panel.background=element_blank(),
#图列属性
#legend.position = "right",
#legend.title = element_blank(),
#legend.text= element_text(face="bold", color="black",family = "ARL", size=8),
#全图字体
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(face="bold", color="black", size=12),
axis.text.y = element_text(face="bold", color="black", size=12),
axis.title.x = element_text(face="bold", color="black", size=12),
axis.title.y = element_text(face="bold",color="black", size=12))
plot_DEGs+theme_set(theme_bw())
plot_DEGs
# close
dev.off()
网友评论