计算Macs产生的peaks在基因组不同染色体上的分布,这里把染色体划分成10^5 bp一个bin。
open FA,"$ARGV[0]";
$/=">";
<FA>;
while(<FA>){
chomp;
my ($id,$seq)=split/\n/,$_,2;
$seq=~s/\n//g;
$seq=~s/\s//g;
$len=length $seq;
$LENGTH{$id}=$len;
#print "$id\t$len\n";
}
$/="\n";
foreach(sort keys %LENGTH){
$start=0;#VERY IMPORT, set $n =0 ,every cycle!!!!
$end=0;#VERY IMPORT, set $n =0 ,every cycle!!!!
$chr=$_;
$num=int($LENGTH{$_}/100000);
#print "$_\t$num\n";
$counts=$num-1;
#print "$counts\n";
for ($i=1;$i<=$counts;$i+=1){
$end+=100000;
$BIN{$n}=[$chr,$start,$end];
#print "$chr\t$start\t$end\n";
$start+=100000;
$n+=1;
}
$n+=1;
$BIN{$n}=[$chr,$end,$LENGTH{$chr}];
#print "$_\t$end\t$LENGTH{$_}\n";
}
open PEAK, "$ARGV[1]";
while(<PEAK>){
chomp;
next,if(/^#/);
next,if(/^chr/);
next,if(/^$/);
@temp=split /\t/,$_;
$chr=$temp[0];
$peak_length=$temp[3];
$peak{$temp[0]}{"$temp[1]\t$temp[2]\t$temp[3]"}=$temp[5];
}
foreach(keys %BIN){
$n=0;# VERY IMPORT, set $n =0 ,every cycle!!!!
my($chr,$peak_start,$peak_end)=@{$BIN{$_}};
#print "$chr\t$peak_start\t$peak_end\n";
foreach(keys %{$peak{$chr}}){
my ($start,$end,$peak_length)=split /\t/,$_;
#print "$start\t$end\t$peak_length\n";
my($a1,$a2,$b1,$b2)=($peak_start,$peak_end,$start,$end);
if($a1>=$b1 and $a1<=$b2 and $a2>$b1 and $a2<=$b2){
$n+=1;
}
elsif(!($a2<$b1 or $b2<$a1)){
@see=sort{$a<=>$b}($a1,$a2,$b1,$b2);
$overlalp=$see[2]-$see[1];
$ratio=$overlalp/$peak_length;
if($ratio>0){
$n+=1;
}
}
$peak_loc_chain{$chr}{"$peak_start\t$peak_end"}=$n;
}
}
foreach $CHR (sort keys %peak_loc_chain ){
foreach(sort{$a<=>$b} keys %{$peak_loc_chain{$CHR}}){
print "$CHR\t$_\t$peak_loc_chain{$CHR}{$_}\n";
}
}
代码实战
image.png
网友评论