peaks 密度图

作者: 余绕 | 来源:发表于2020-05-31 22:13 被阅读0次

计算Macs产生的peaks在基因组不同染色体上的分布,这里把染色体划分成10^5 bp一个bin。

open FA,"$ARGV[0]";

$/=">";
<FA>;
while(<FA>){
chomp;
my ($id,$seq)=split/\n/,$_,2;

$seq=~s/\n//g;
$seq=~s/\s//g;

$len=length $seq;
$LENGTH{$id}=$len;
#print "$id\t$len\n";

}

$/="\n";

foreach(sort keys %LENGTH){
    $start=0;#VERY IMPORT, set $n =0 ,every cycle!!!!
    $end=0;#VERY IMPORT, set $n =0 ,every cycle!!!!
    $chr=$_;
    $num=int($LENGTH{$_}/100000);
    #print "$_\t$num\n";
    $counts=$num-1;
    #print "$counts\n"; 
    for ($i=1;$i<=$counts;$i+=1){
    $end+=100000;
    $BIN{$n}=[$chr,$start,$end];
    #print "$chr\t$start\t$end\n";
    $start+=100000;
    $n+=1;
    }
     $n+=1;
$BIN{$n}=[$chr,$end,$LENGTH{$chr}];
#print "$_\t$end\t$LENGTH{$_}\n";
}


open PEAK, "$ARGV[1]";

while(<PEAK>){
chomp;
next,if(/^#/);
next,if(/^chr/);
next,if(/^$/);

@temp=split /\t/,$_;
$chr=$temp[0];
$peak_length=$temp[3];
$peak{$temp[0]}{"$temp[1]\t$temp[2]\t$temp[3]"}=$temp[5];
}


foreach(keys %BIN){
$n=0;# VERY IMPORT, set $n =0 ,every cycle!!!!
my($chr,$peak_start,$peak_end)=@{$BIN{$_}};
    #print "$chr\t$peak_start\t$peak_end\n";
    foreach(keys %{$peak{$chr}}){
        my ($start,$end,$peak_length)=split /\t/,$_;
        #print "$start\t$end\t$peak_length\n";
        my($a1,$a2,$b1,$b2)=($peak_start,$peak_end,$start,$end);
        
        if($a1>=$b1 and $a1<=$b2 and $a2>$b1 and $a2<=$b2){
        $n+=1;
        }
        elsif(!($a2<$b1 or $b2<$a1)){
            @see=sort{$a<=>$b}($a1,$a2,$b1,$b2);
            $overlalp=$see[2]-$see[1];
            $ratio=$overlalp/$peak_length;
                if($ratio>0){
                    $n+=1;
        }
    
    }
    $peak_loc_chain{$chr}{"$peak_start\t$peak_end"}=$n;
    
    }


}

foreach $CHR (sort keys %peak_loc_chain ){
    foreach(sort{$a<=>$b} keys %{$peak_loc_chain{$CHR}}){
    print "$CHR\t$_\t$peak_loc_chain{$CHR}{$_}\n";
    }   
}

代码实战


image.png

相关文章

网友评论

    本文标题:peaks 密度图

    本文链接:https://www.haomeiwen.com/subject/bccdzhtx.html