peaks 密度图

作者: 余绕 | 来源:发表于2020-05-31 22:13 被阅读0次

    计算Macs产生的peaks在基因组不同染色体上的分布,这里把染色体划分成10^5 bp一个bin。

    open FA,"$ARGV[0]";
    
    $/=">";
    <FA>;
    while(<FA>){
    chomp;
    my ($id,$seq)=split/\n/,$_,2;
    
    $seq=~s/\n//g;
    $seq=~s/\s//g;
    
    $len=length $seq;
    $LENGTH{$id}=$len;
    #print "$id\t$len\n";
    
    }
    
    $/="\n";
    
    foreach(sort keys %LENGTH){
        $start=0;#VERY IMPORT, set $n =0 ,every cycle!!!!
        $end=0;#VERY IMPORT, set $n =0 ,every cycle!!!!
        $chr=$_;
        $num=int($LENGTH{$_}/100000);
        #print "$_\t$num\n";
        $counts=$num-1;
        #print "$counts\n"; 
        for ($i=1;$i<=$counts;$i+=1){
        $end+=100000;
        $BIN{$n}=[$chr,$start,$end];
        #print "$chr\t$start\t$end\n";
        $start+=100000;
        $n+=1;
        }
         $n+=1;
    $BIN{$n}=[$chr,$end,$LENGTH{$chr}];
    #print "$_\t$end\t$LENGTH{$_}\n";
    }
    
    
    open PEAK, "$ARGV[1]";
    
    while(<PEAK>){
    chomp;
    next,if(/^#/);
    next,if(/^chr/);
    next,if(/^$/);
    
    @temp=split /\t/,$_;
    $chr=$temp[0];
    $peak_length=$temp[3];
    $peak{$temp[0]}{"$temp[1]\t$temp[2]\t$temp[3]"}=$temp[5];
    }
    
    
    foreach(keys %BIN){
    $n=0;# VERY IMPORT, set $n =0 ,every cycle!!!!
    my($chr,$peak_start,$peak_end)=@{$BIN{$_}};
        #print "$chr\t$peak_start\t$peak_end\n";
        foreach(keys %{$peak{$chr}}){
            my ($start,$end,$peak_length)=split /\t/,$_;
            #print "$start\t$end\t$peak_length\n";
            my($a1,$a2,$b1,$b2)=($peak_start,$peak_end,$start,$end);
            
            if($a1>=$b1 and $a1<=$b2 and $a2>$b1 and $a2<=$b2){
            $n+=1;
            }
            elsif(!($a2<$b1 or $b2<$a1)){
                @see=sort{$a<=>$b}($a1,$a2,$b1,$b2);
                $overlalp=$see[2]-$see[1];
                $ratio=$overlalp/$peak_length;
                    if($ratio>0){
                        $n+=1;
            }
        
        }
        $peak_loc_chain{$chr}{"$peak_start\t$peak_end"}=$n;
        
        }
    
    
    }
    
    foreach $CHR (sort keys %peak_loc_chain ){
        foreach(sort{$a<=>$b} keys %{$peak_loc_chain{$CHR}}){
        print "$CHR\t$_\t$peak_loc_chain{$CHR}{$_}\n";
        }   
    }
    
    

    代码实战


    image.png

    相关文章

      网友评论

        本文标题:peaks 密度图

        本文链接:https://www.haomeiwen.com/subject/bccdzhtx.html