后续分析(post-hoc analysis)
是指在进行实验或收集数据后进行的进一步分析。它是一种探索性的数据分析方法,旨在探究数据间的关系、检验假设或发现新的模式或趋势。
后续分析通常用于以下情况:
- 根据初步分析的结果选择感兴趣的变量或组合进行进一步探索。
- 在主要研究结果或效应不明确时,对数据进行副次分析以了解更多细节。
- 比较多个处理组或条件之间的差异,并确定是否有显著性差异。
- 发现或验证新的关联或模式。
后续分析可以使用各种统计方法,如t检验、方差分析(ANOVA)、卡方检验、回归分析等,具体方法取决于研究问题和数据类型。然而,需要注意的是,由于后续分析的探索性特性,需要小心解释和解读结果,以避免过度解读或错误的推断。
## 导入数据
data <- read.table("matrix.txt",header = TRUE,row.names = 1,sep = "\t")
group <- read.table("group.txt",header = TRUE,sep = "\t")
library(tidyverse)
# 过滤掉平均丰度低于1的物种或基因
data <- data %>% filter(apply(data,1,mean) > 1)
data <- t(data)
# 分组按表达矩阵排序 以便
row.names(group) <- group$sample
group <- group[row.names(data),]
data1 <- data.frame(data,group$group)
colnames(data1) <- c(colnames(data),"Group")
data1$Group <- as.factor(data1$Group)
## t-test
diff <- data1 %>%
select_if(is.numeric) %>%
map_df(~ broom::tidy(t.test(. ~ Group,data = data1)), .id = 'var')
diff$p.value <- p.adjust(diff$p.value,"bonferroni")
## 只显示有差异的物种
# diff <- diff %>% filter(p.value < 0.05)
## Bar chart
## 分组求均值和标准差
## 这里以标准差为例,若求标准误 除以根号n即可
abun.bar <- data1[,c(diff$var,"Group")] %>%
gather(variable,value,-Group) %>%
group_by(variable,Group) %>%
summarise(Mean = mean(value),SD = sd(value))
library(ggplot2)
cbbPalette <- c("#FFCC00", "#56B4E9")
abun.bar$variable <- factor(abun.bar$variable,levels = rev(diff.mean$var))
p1 <- ggplot(abun.bar,aes(variable,Mean,fill = Group)) +
scale_x_discrete(limits = levels(diff$var)) +
coord_flip() +
xlab("") +
ylab("")+
theme(panel.background = element_rect(fill = 'transparent'),
panel.grid = element_blank(),
axis.ticks.length = unit(0.4,"lines"),
axis.ticks = element_line(color='black'),
axis.line = element_line(colour = "black"),
axis.title.x=element_text(colour='black', size=12,face = "bold"),
axis.text=element_text(colour='black',size=10,face = "bold"),
legend.title=element_blank(),
legend.text=element_text(size=12,face = "bold",colour = "black",
margin = margin(r = 20)),
legend.position = c(0.1,1.1),
legend.direction = "horizontal",
legend.key.width = unit(0.8,"cm"),
legend.key.height = unit(0.5,"cm"))
p1
for (i in 1:(nrow(diff) - 1))
p1 <- p1 + annotate('rect', xmin = i+0.5, xmax = i+1.5, ymin = -Inf, ymax = Inf,
fill = ifelse(i %% 2 == 0, 'white', 'gray95'))
p1 <- p1 +
geom_errorbar(aes(ymin=Mean-SD,ymax=Mean+SD),width=0.6,position=position_dodge(0.8))+
geom_col(position=position_dodge(0.8),width = 0.8, colour="black") +
scale_fill_manual(values=cbbPalette)
p1
## 右侧散点图
diff.mean <- diff[,c("var","estimate","conf.low","conf.high","p.value")]
diff.mean$Group <- factor(ifelse(diff.mean$conf.high>0, ifelse(diff.mean$conf.low >0 ,levels(data1$Group)[1],'Nosig'),levels(data1$Group)[2]),levels=c('bulk','Nosig','rhizosphere'))
diff.mean <- diff.mean[order(diff.mean$estimate,decreasing = TRUE),]
diff.mean$p.value <- signif(diff.mean$p.value,3)
diff.mean$p.value <- as.character(diff.mean$p.value)
p2 <- ggplot(diff.mean,aes(var,estimate,fill = Group)) +
theme(panel.background = element_rect(fill = 'transparent'),
panel.grid = element_blank(),
axis.ticks.length = unit(0.4,"lines"),
axis.ticks = element_line(color='black'),
axis.line = element_line(colour = "black"),
axis.title.x=element_text(colour='black', size=12,face = "bold"),
axis.text=element_text(colour='black',size=10,face = "bold"),
axis.text.y = element_blank(),
legend.position = "none",
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 15,face = "bold",colour = "black",hjust = 0.5)) +
scale_x_discrete(limits = levels(diff.mean$var)) +
coord_flip(ylim=c()) +
xlab("") +
ylab("Mean effect size (%)") +
labs(title="95% confidence intervals")
for (i in 1:(nrow(diff.mean) - 1))
p2 <- p2 + annotate('rect', xmin = i+0.5, xmax = i+1.5, ymin = -Inf, ymax = Inf,
fill = ifelse(i %% 2 == 0, 'white', 'gray95'))
p2 <- p2 +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
position = position_dodge(0.8), width = 0.5, size = 0.5) +
geom_point(shape = 21,size = 4.5) +
scale_fill_manual(values= c("#FFCC00", "white","#56B4E9")) +
geom_hline(aes(yintercept = 0), linetype = 'dashed', color = 'black')
p2
p3 <- ggplot(diff.mean,aes(var,estimate,fill = Group)) +
geom_text(aes(y = 0,x = var),label = diff.mean$p.value,
hjust = 0,fontface = "bold",inherit.aes = FALSE,size = 3) +
geom_text(aes(x = nrow(diff.mean)/2 +0.5,y = 0.6),label = "P-value (corrected)",
srt = 90,fontface = "bold",size = 5) +
coord_flip() +
ylim(c(0,1)) +
theme(panel.background = element_blank(),
panel.grid = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank())
p3
## 图像拼接
library(patchwork)
p <- p1 + p2 + p3 + plot_layout(widths = c(4,6,2))
p
## 保存图像
ggsave("post-hoc.pdf",p,width = 10,height = 4)
group
matrix
代码及示例数据:
链接:https://pan.baidu.com/s/1IrDqkQjBuS1KcsfWKYPc1Q?pwd=azac
提取码:azac
网友评论