美文网首页
R可视化:标记离群点

R可视化:标记离群点

作者: 生信学习者2 | 来源:发表于2020-12-03 10:20 被阅读0次

在使用boxplot的时候,我们常常会遇到某个离群点情况,那么如何标记boxplot的离群点呢。更多知识分享请到 https://zouhua.top/

导入数据和R包

library(dplyr)
library(tibble)
library(ggplot2)
library(ggpubr)

# 设置颜色
grp <- c("Control", "Case")
grp.col <- c("#2D6BB4", "#EE2B2B")

# 导入数据

phen <- read.csv("phenotype.csv")
evenness <- read.table("evenness_vector.tsv")
shannon <- read.table("shannon_vector.tsv")

处理数据

dat <- inner_join(phen, evenness %>% rownames_to_column("SampleID"),
                  by = "SampleID") %>%
  inner_join(shannon %>% rownames_to_column("SampleID"),
             by = "SampleID") %>%
  dplyr::select(SampleID, Group, pielou_evenness, shannon_entropy) %>%
  setNames(c("SampleID", "Group", "evenness", "shannon")) #%>%
  # tidyr::gather(key="type", value="value", -c("SampleID", "Group")) %>%
  # mutate(Group=factor(Group, levels = grp))

# 提取离群点函数:四分位数
is_outlier <- function(x) {
  return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x))
}

# 设置非离群点标签为NA
dat.evenness <- dat %>% group_by(Group) %>% 
  mutate(is_outlier=ifelse(is_outlier(evenness), evenness, as.numeric(NA)))
dat.evenness$SampleID[which(is.na(dat.evenness$is_outlier))] <- as.numeric(NA)

dat.shannon <- dat %>% group_by(Group) %>% 
  mutate(is_outlier=ifelse(is_outlier(shannon), shannon, as.numeric(NA)))
dat.shannon$SampleID[which(is.na(dat.shannon$is_outlier))] <- as.numeric(NA)

mdat <- rbind(dat.evenness %>% dplyr::select(SampleID, Group, is_outlier, evenness) %>%
  mutate(type="evenness") %>% rename(value=evenness), 
        dat.shannon %>% dplyr::select(SampleID, Group, is_outlier, shannon) %>%
          mutate(type="shannon") %>% rename(value=shannon))

可视化

ggplot(mdat, aes(x=Group, y=value))+
    stat_boxplot(aes(fill = Group), geom = "errorbar", width = 0.15,
                 position = position_dodge(0.4)) + 
    geom_boxplot(aes(fill = Group), width = 0.4, 
                 outlier.colour = "black", 
                 outlier.shape=21, outlier.size = 1)+
    geom_text(aes(label=SampleID),na.rm=TRUE,nudge_y=0.05)+
    labs(x="", y="alpha-diversity")+
    facet_wrap(facets = "type", scales = "free")+
    stat_compare_means(comparisons = list(grp),
                       method = "wilcox.test")+
    scale_fill_manual(values=grp.col)+
    guides(fill=F)+
    theme_bw()+ 
    theme(axis.ticks.length = unit(0.2, "cm"),
          axis.title = element_text(face = "bold", size = 12),
          axis.text.x = element_text(face = "bold", size = 10),
          axis.text.y = element_text(size = 10, face = "bold"),
          strip.text = element_text(color = 'red', face = 'bold', size = rel(1.5)),
          strip.background = element_rect(colour = 'black', size = rel(2)))

参考

参考文章如引起任何侵权问题,可以与我联系,谢谢。

相关文章

网友评论

      本文标题:R可视化:标记离群点

      本文链接:https://www.haomeiwen.com/subject/ddxmwktx.html