open BED, "$ARGV[0]";
while(<BED>){
chomp;
@temp=split /\t/,$_;
$chr=$temp[0];
$hash_bed{$temp[3]}=[$temp[0],$temp[1],$temp[2]];
}
open FA,"$ARGV[1]";
while(<FA>){
chomp;
@temp=split /\t/,$_;
@matchM=$temp[5]=~m/(\d*)M/g; #Put the matched values in an array!
@matchD=$temp[5]=~m/(\d*)D/g;
@MATCH=(@matchM,@matchD);
$match=0;
foreach(@MATCH){
$match+=$_;
}
$left=$temp[3]-1;
$right=$left+$match;
$hash_bam{$temp[2]}{"$left\t$right"}=1;
}
foreach(keys %hash_bed){
my $n=0; #非常重要,每次循环清零,不然连续累加
my $loc=$_;
#$array=$hash_bed{$_};
my($chr,$gene_start,$gene_end)=@{$hash_bed{$_}};
#print "$chr\t$gene_start\t$gene_end\n";
foreach(keys %{$hash_bam{$chr}}){
my($peak_start,$peak_end)=split /\t/,$_;
$peak_len=$peak_end-$peak_start+1;
#print "$peak_start\t$peak_end\t$peak_len\n";
my ($a1,$a2,$b1,$b2)=($gene_start,$gene_end,$peak_start,$peak_end);
if($a1>=$b1 and $a1<=$b2 and $a2>$b1 and $a2<=$b2){
$n+=1;
}
elsif(!($a2<$b1 or $b2<$a1)){
@see=sort{$a<=>$b}($a1,$a2,$b1,$b2);
$overlalp=$see[2]-$see[1];
$ratio=$overlalp/$peak_len;
if($ratio>0){
$n+=1;
}
}
$peak_loc_chain{$chr}{"$gene_start\t$gene_end"}="$loc\t$n";
}
}
foreach $CHR(sort keys %peak_loc_chain){
foreach(sort keys %{$peak_loc_chain{$CHR}}){
print "$CHR\t$_\t$peak_loc_chain{$CHR}{$_}\n";
}
}
code 练习
image.png
网友评论