最近处理一批小鼠衰老数据,如何展示不同时间点的基因表达数据比较好玩,因此记录一下。更多知识分享请到 https://zouhua.top/。
加载R包
library(dplyr)
library(tibble)
library(ggplot2)
library(data.table)
library(convert)
library(ggpubr)
age <- c("1", "3", "6", "9", "12", "15", "18", "21", "24", "27")
age.col <- c("#283891", "#EED3AC", "#C1272D", "#9DCEDC",
"#ED1C24", "#9DCEDC", "#F89C31", "#B6B6BA", "#6DC06A", "#2D6BB4")
导入数据
phen <- read.csv("phenotype.csv")
prof <- fread("TPM.tsv") %>%
column_to_rownames("V1")
mouse2human <- read.table("gene_mouse2human.tsv",
sep = "\t", header = T)
genelist <- read.table("gene_list.tsv", header = T)
#mouse2human %>% filter(HGNC_symbol%in%genelist$gene2)
处理数据
get_expr_Set <- function(x, y, gene_list=gene_list,
ncount=10, occurrence=0.2){
# x <- phen
# y <- prof
# ncount=10
# occurrence=0.2
prf <- y[rowSums(y) > ncount, ]
prf <- prf[, colSums(prf) > 0]
sid <- intersect(x$SampleID, colnames(y))
phe <- x %>% filter(SampleID%in%sid) %>%
column_to_rownames("SampleID_v2")
prf.cln <- prf %>% dplyr::select(as.character(phe$SampleID)) %>%
rownames_to_column("tmp") %>%
filter(apply(dplyr::select(., -one_of("tmp")), 1, function(x) {
sum(x != 0)/length(x)}) > occurrence) %>%
column_to_rownames("tmp")
# determine the right order between profile and phenotype
for(i in 1:ncol(prf.cln)){
if (!(colnames(prf.cln)[i] == phe$SampleID[i])) {
stop(paste0(i, " Wrong"))
}
}
# change ensemble id into HGNC symbol
mmu_hsa_gene <- inner_join(
prf.cln %>% rownames_to_column("geneid"),
mouse2human %>% dplyr::select(ensembl_id_mouse, HGNC_symbol),
by = c("geneid" = "ensembl_id_mouse")) %>%
dplyr::select(-geneid)
mmu_hsa_gene$median <- apply(mmu_hsa_gene[,-ncol(mmu_hsa_gene)], 1, median)
mmu_hsa_gene <- with(mmu_hsa_gene,
mmu_hsa_gene[order(HGNC_symbol, median, decreasing = T), ])
mmu_hsa_gene.new <- mmu_hsa_gene[!duplicated(mmu_hsa_gene$HGNC_symbol), ] %>%
dplyr::select(-median)
rownames(mmu_hsa_gene.new) <- NULL
mmu_hsa_gene.new <- mmu_hsa_gene.new %>% column_to_rownames("HGNC_symbol")
mmu_hsa_gene.new.cln <- mmu_hsa_gene.new %>% rownames_to_column("gene") %>%
filter(gene%in%gene_list$gene2) %>%
column_to_rownames("gene")
exprs <- as.matrix(mmu_hsa_gene.new.cln)
metadata <- new("AnnotatedDataFrame", data=phe)
experimentData <- new("MIAME",
name="Mouse aging", lab="Lab",
contact="hua zou@outlook.com",
title="Mouse Aging Experiment",
abstract="The gene ExpressionSet",
url="www.zouhua.top",
other=list(notes="Created from text files"))
expressionSet <- new("ExpressionSet", exprs=exprs,
phenoData=metadata,
experimentData=experimentData)
return(expressionSet)
}
gene_expr_set <- get_expr_Set(phen, prof, gene_list=genelist)
获取画图数据
dat_expr <- exprs(gene_expr_set) %>% data.frame()
dat_expr.cln <- dat_expr[, colSums(dat_expr) > 0]
dat_phen <- data.frame(SampleID=gene_expr_set$SampleID, Age=gene_expr_set$Age)
mdat <- inner_join(dat_phen,
dat_expr.cln %>% t() %>% data.frame() %>% rownames_to_column("SampleID"),
by = "SampleID") %>%
tidyr::gather(key="Gene", value="TMP_Value", -c("SampleID", "Age")) %>%
mutate(Age=factor(Age, levels = age))
画图
pl <- ggplot(mdat, aes(x=Age, y=TMP_Value))+
geom_dotplot(aes(fill=Age), binaxis='y', stackdir='center', dotsize=0.9,
bins = 30)+
# stat_summary(fun.data="mean_sdl", fun.args = list(mult=1),
# geom="crossbar", width=0.5)+
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar", color="red", width=0.2)+
stat_summary(fun.y=mean, geom="point", color="black", fill="white", size=2)+
stat_summary(fun.y=mean, geom="line", group=1, linetype=1, size=0.7)+
stat_compare_means(comparisons = list(c("1", "3")),
method = "wilcox.test")+
scale_fill_manual(values = age.col)+
scale_x_discrete(breaks=age,
labels=paste0(age, "\nMonth"))+
labs(x="", y="Gene expression value (TPM)")+
facet_wrap(facets = "Gene", scales = "free")+
theme_bw()+
theme(axis.ticks.length = unit(0.2, "cm"),
axis.title = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", size = 10),
axis.text.y = element_text(size = 10, face = "bold"),
strip.text = element_text(color = 'red', face = 'bold', size = rel(1.5)),
strip.background = element_rect(colour = 'black', size = rel(2)))
pl
保存图形
ggsave("test.pdf", width = 14, height = 8, dpi = 300)
#ggsave("test.png", width = 14, height = 8, dpi = 300)
参考
参考文章如引起任何侵权问题,可以与我联系,谢谢。
网友评论