1 read tree
library("ggtree")
library("ggplot2")
library("ggstance") # for geom_barh
library("reshape2") # for melt
tree = read.tree("taxon_fig.nwk")
tb = fortify(tree)

2 plot tree
2.1 配色
p_sort = c("Firmicutes",
"Proteobacteria",
"Actinobacteria",
"Bacteroidetes",
"Euryarchaeota",
"Synergistetes",
"Tenericutes",
"Deinococcus_Thermus",
"Fusobacteria",
"Spirochaetes",
"Acidobacteria",
"Cyanobacteria",
"Chlamydiae",
"Chlorobi",
"Deferribacteres",
"Planctomycetes",
"Thermodesulfobacteria",
"Verrucomicrobia")
col_list = read.table("C:/Users/hutongyuan/Desktop/group_color.list",sep="\t", check.names=F, na.string="", stringsAsFactors=F, quote="", comment.char="")
colors = col_list$V1[1:18]
names(colors) <- p_sort

2.2 输入文件及处理
phylum = read.table("data_phylum.txt", header=T, sep="\t")
phylum$Phylum = factor(phylum$Phylum, levels=p_sort)

2.3 ggtree
base =
ggtree(tree, color="black", layout="rectangular",
size = 0.5) %<+% phylum +
geom_tippoint(aes(col = as.factor(Phylum)), size=6) +
theme(legend.title=element_text(face="bold", size=20), legend.position="right",
legend.text=element_text(size=15),
legend.key=element_rect(size=20)) +
labs(color = "Phylum") +
scale_color_manual(
values = colors)
ggsave(base, file="tree_base.pdf")

3 add box
3.1 输入文件及处理
bgi = read.table("data_bgi.txt", header=T, sep="\t")
bgi = melt(bgi, id='Genus')

3.2 facet_plot geom_boxploth
add_box =
facet_plot(base, panel = "BGI cohort",
data = bgi,
geom_boxploth,
outlier.size = 0.02,
size = 0.2,
mapping = aes(x=value*100,
group = label,
color = Phylum))

4 add stackplot
需要的是bar图(分组),但是需要mapping上色,自行构造stackplot(把不需要的颜色值设为0即可)。分组不多手动excel即可,分组太多的话可以用dcast,不完全矩阵会有NA,NA值设为0即可。其实不用mapping也行,即在aes外用color/fill上色,但是需要明确的树结构信息,搞不清树结构误入。
4.1 输入数据及处理
group = read.table("data_group.txt", header=T, sep="\t")
group = melt(group, id='Genus')

4.1 facet_plot geom_barh
add_group =
facet_plot(add_box, panel = 'Source', data = group, geom = geom_barh,
aes(x = value, fill = variable),
width = 1,
stat='identity') +
theme_tree2() +
labs(fill="Source") +
scale_fill_manual(
values = c("Share" = "green",
"CGR2" = "indianred3",
"BGI_cohort" = "deepskyblue3"))

5 add bar
目前愚笨的办法是用mapping match color,但是会有0值杂色。最简法干脆不上色,最好的方法是解析树结构,目前解析一般了吧,以后邂逅同问再捯饬。
5.1 输入数据和处理
cgr2 = read.table("data_cgr2.txt", header=T, sep="\t")
我试图解析树结构,目前是错了

5.2 facet_plot geom_barh
add_bar =
facet_plot(add_group, panel = 'CGR2', data = cgr2, geom = geom_barh,
aes(x = value*100, fill = variable), color = NA,
width = 1,
stat='identity') +
theme_tree2()

如图见,下面的颜色对应是错的
清晰地认识到自己R基本功的不足,
1 一个图,即使是这里的组合组,只能由一套color_scale 一套fill_scale
2 facet_plot基本参数outlier.size还是惊扰的神架
3 width size 等基本参数不在库中
此处,非常感谢Y叔(Prof. 余光创)的答疑。
ggtree facet_plot 每个美图狗必备技能:
facet_plot: a general solution to associate data with phylogenetic tree
网友评论