#从gene_info中的到不同指标、不同步长的染色体上的SNP分布
get_distrubution_from_geneinfo<-function(gene_info,filter_key=list("Sample1wt.index==F","Sample2wt.index==F","Sample3wt.index==F"),step=2e6,plot_dir="./"){
require(patchwork)
require(stringr)
require(reshape2)
require(ggplot2)
require(Hmisc)
require(patchwork)
data("human_karyotype")
step=step
test<-lapply(filter_key,function(x){
eval(parse(text = paste0("subset(gene_info,",x,")")))
})
test<-lapply(test,function(x){
data.frame(SNPid=paste("SNP",1:nrow(x),sep = "_"),
Chr=str_split(x$loc,"_",simplify = T)[,1],
position=str_split(x$loc,"_",simplify = T)[,2])
})
#使用ggplot绘图
#1、对染色体进行分区
chrom_region<-apply(human_karyotype,1,function(x){
a=seq(from=1,
to=x[3],
by=step)
a[length(a)+1]=x[3]
return(a)
})
names(chrom_region)<-paste("chr",human_karyotype$Chr,sep = "")
#2、统计分区内的snp数目
test<-lapply(test,function(x){
x<-split(x,x$Chr)
x<-x[names(chrom_region)]
})
sampleinfo_stat<-lapply(test,function(x){
m=lapply(1:length(x),function(y){
a=as.data.frame(table(cut(as.numeric(as.character(x[[y]]$position)),breaks = as.numeric(chrom_region[[y]]))))
a[,1]<-chrom_region[[y]][-which(chrom_region[[y]]==max(chrom_region[[y]]))]
return(a)
})
names(m)=names(chrom_region)
return(m)
})
#3、使用ggplot可视化
for (chr_id in names(chrom_region)) {
if(T){
SNP_samples_stat<-do.call(rbind,lapply(sampleinfo_stat,function(x){x[[chr_id]]}))
SNP_samples_stat$group=paste("Condition",rep(c(1:length(filter_key)),each=nrow(SNP_samples_stat)/length(filter_key)),sep = "_")
SNP_samples_stat$Var1<-factor(SNP_samples_stat$Var1,levels = unique(as.numeric(SNP_samples_stat$Var1)))
p1<-ggplot(SNP_samples_stat,aes(x=Var1,y=Freq,group=group,color=group))+
geom_point()+geom_line()+
theme(axis.text.x = element_text(angle = 90))+
labs(title=capitalize(chr_id))
p2<-ggplot(SNP_samples_stat,aes(x=Var1,y=group))+
geom_tile(aes(fill=Freq),color="white")+
scale_fill_gradient(low = "white",high = "red")+
theme(axis.text.x = element_text(angle = 90))
p3=p1/p2
ggsave(paste(plot_dir,chr_id,".png",sep = ""),plot =p3,width = 15,height = 10 )
}
}
}
get_distrubution_from_geneinfo(gene_info)

image.png
网友评论