美文网首页
[R数据]合并sample,计算sd,平均值,变异系数

[R数据]合并sample,计算sd,平均值,变异系数

作者: 花生学生信 | 来源:发表于2024-09-11 09:58 被阅读0次

# Load the merged_data.tsv file
merged_data <- read.delim("merged_with_filenames.tsv", header = TRUE, sep = "\t", stringsAsFactors=FALSE)
head(merged_data)

group<-read.delim("group.tsv", header = TRUE, sep = "\t", stringsAsFactors=FALSE)
head(group)  


merged_result <- merge(merged_data, group, by = "sample")
head(merged_result)
  
if (!require(dplyr)) {
  install.packages("dplyr")
}
library(dplyr)



result <- merged_result %>%
  group_by(k5, gene) %>%
  summarize(
    avg_count = round(mean(num), 2),
    sd_count = round(sd(num), 2)
  ) %>%
  mutate(CV = round((sd_count / avg_count),2))

print(result)
write.csv(result, file = "results.csv", row.names = FALSE)
merged_with_filenames.tsv 每个基因的结果

绘图代码

library(ggplot2)

# 绘制箱线图并添加颜色
ggplot(result, aes(x = k5, y = CV, fill = k5)) +
  geom_boxplot() +
  labs(x = "k5", y = "CV") +
  theme_minimal() +
  scale_fill_manual(values = c("red", "blue", "green", "orange","pink")) 
# 根据 k5 列的不同类别数量和你想要的颜色自定义颜色值,如上面的 values 中可以根据实际 k5 列的类别修改颜色数量和颜色值


# 筛选出不包含 Admix 的数据
filtered_data <- result %>% filter(k5!= "Admix")

# 进行组间比较
comparison <- list(c("Xian", "Geng"))

# 绘制箱线图并添加显著性标记
p <- ggplot(filtered_data, aes(x = k5, y = CV, fill = k5)) +
  geom_boxplot() +
  labs(x = "k5", y = "CV") +
  theme_minimal() +
  scale_fill_manual(values = c("Xian" = "red", "Geng" = "blue")) +
  stat_compare_means(comparisons = comparison)

print(p)

相关文章

网友评论

      本文标题:[R数据]合并sample,计算sd,平均值,变异系数

      本文链接:https://www.haomeiwen.com/subject/vetpljtx.html