1. 数据准备
# 加载所需的R包
library(reshape2)
library(dplyr)
library(ggpubr)
library(FSA)
# 读取文件
data_sample <- read.delim("/data/shumin/test/predictions_ko.L3.xls") # 样本信息
data_sample[1:3, 1:8]
## Pathway_level1 Pathway_level2 Pathway_level3 Level3_description N11 N12 N14 N15
## 1 Metabolism Carbohydrate metabolism ko00010 Glycolysis / Gluconeogenesis 0.01438271 0.015587858 0.014518278 0.015202541
## 2 Metabolism Carbohydrate metabolism ko00020 Citrate cycle (TCA cycle) 0.00394630 0.003274079 0.003909439 0.003349509
## 3 Metabolism Carbohydrate metabolism ko00030 Pentose phosphate pathway 0.01052230 0.010446724 0.010254249 0.010372544
data_group <- read.delim("/data/shumin/test/group.txt") # 分组信息
head(data_group)
## Sample Group
## 1 N11 Normal
## 2 N12 Normal
## 3 N14 Normal
## 4 N15 Normal
## 5 N17 Normal
## 6 N18 Normal
Data <- as.data.frame(t(data_sample[,c(4:23)])) # 提取需要的数据,4-23列(样本)
colnames(Data) <- Data[1,] # 将第一行变为列名
Data_2 <- Data[-1, ] %>% mutate(Sample = row.names(.)) %>% merge(., data_group, by = "Sample") # 去除第一行并和分组信息合并
str(Data_2)
## 'data.frame': 19 obs. of 264 variables:
## $ Sample : chr "N11" "N12" "N14" "N15" ...
## $ Glycolysis / Gluconeogenesis : chr "1.438271e-02" "1.558786e-02" "1.451828e-02" "1.520254e-02" ...
## $ Citrate cycle (TCA cycle) : chr "3.946300e-03" "3.274079e-03" "3.909439e-03" "3.349509e-03" ...
## $ Pentose phosphate pathway : chr "1.052230e-02" "1.044672e-02" "1.025425e-02" "1.037254e-02" ...
## $ Pentose and glucuronate interconversions : chr "5.962178e-03" "5.686224e-03" "5.268724e-03" "5.778111e-03" ...
## $ Fructose and mannose metabolism : chr "1.653927e-02" "1.731108e-02" "1.595058e-02" "1.714072e-02" ...
## ......
Data_2[2:263] <- lapply(Data_2[2:263], as.numeric) # 将character数据格式改为numeric
str(Data_2)
## 'data.frame': 19 obs. of 264 variables:
## $ Sample : chr "N11" "N12" "N14" "N15" ...
## $ Glycolysis / Gluconeogenesis : num 0.0144 0.0156 0.0145 0.0152 0.0146 ...
## $ Citrate cycle (TCA cycle) : num 0.00395 0.00327 0.00391 0.00335 0.00381 ...
## $ Pentose phosphate pathway : num 0.0105 0.0104 0.0103 0.0104 0.0105 ...
## $ Pentose and glucuronate interconversions : num 0.00596 0.00569 0.00527 0.00578 0.00606 ...
## $ Fructose and mannose metabolism : num 0.0165 0.0173 0.016 0.0171 0.0169 ...
## ......
2. 图形绘制
# 设置错误处理机制
options(error = recover)
for (i in 2:(length(colnames(Data_2))-1)) { # 最后一列为分组信息
tryCatch({
test <- Data_2[,c(1, i)]
test$Group<- factor(Data_2$Group, levels = c("Normal","Mild","Severe")) # 修改因子水平,方便后续绘图时按照指定的因子水平顺序进行绘制
dunn_test <- dunnTest(test[,2] ~ Group, method = "bonferroni", data = test) # Dunn's test(邓恩检验),多重比较分析,用于比较多个群组之间的差异
# 添加文本标注
label_data <- data.frame(
x = c(1.5, 2.5, 2), # 添加文本的x轴位置
y = rep(c(max(test[,2]) + 0.001, max(test[,2]) + 0.0015, max(test[,2]) + 0.002)), # 添加文本的y轴位置
label = sprintf("p = %.4f", dunn_test[["res"]]$P.adj) # 文本保留4位小数
)
# 添加线段
line_data <- data.frame(
x = c(1, 2, 1), # 线段x轴的起始位置
y = rep(c(max(test[,2]) + 0.0007, max(test[,2]) + 0.0012, max(test[,2]) + 0.0017)), # 线段y轴的起始位置
xend = c(2, 3, 3), # 线段x轴的终止位置
yend = rep(c(max(test[,2]) + 0.0007, max(test[,2]) + 0.0012, max(test[,2]) + 0.0017)) # 线段y轴的终止位置
)
png(filename = paste0(i, "_2.png"), width = 4000, height = 3000, res = 300)
print(ggboxplot(test, x = "Group", y = test[2], color = colnames(test)[3], add = "jitter") +
scale_color_manual(values = c("#3D6590", "#F7D46E", "#DC615F")) + # 自定义颜色(和我的保持一致了)
stat_compare_means() + # 添加全局p值,默认kruskal.test(非参数检验),也可改anova(参数检验)
theme_bw() +
ylab("Percentage") +
ggtitle(colnames(test)[2]) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.85, hjust = 0.75),
plot.title = element_text(hjust = 0.5)) +
geom_text(data = label_data, aes(x = x, y = y, label = label), size = 4, inherit.aes = FALSE) +
geom_segment(data = line_data, aes(x = x, y = y, xend = xend, yend = yend), size = 0.7, color = "black")
)
dev.off()
}, error = function(e){
# 打印错误消息并跳过继续后续
cat("Error in boxplot for Group", i, ": ", conditionMessage(e), "\n")
})
}
# 服务器终端查看
$ ls
100_2.png 127_2.png 154_2.png 182_2.png 209_2.png 235_2.png 26_2.png 5_2.png 80_2.png
101_2.png 128_2.png 155_2.png 18_2.png 210_2.png 236_2.png 263_2.png 53_2.png 81_2.png
102_2.png 129_2.png 156_2.png 183_2.png 211_2.png 237_2.png 27_2.png 54_2.png 82_2.png
10_2.png 130_2.png 157_2.png 184_2.png 212_2.png 238_2.png 28_2.png 55_2.png 8_2.png
103_2.png 131_2.png 158_2.png 185_2.png 21_2.png 239_2.png 29_2.png 56_2.png 83_2.png
......
ggplot2-1
网友评论