美文网首页
利用perl脚本统计标记的信息

利用perl脚本统计标记的信息

作者: qujingtao | 来源:发表于2022-07-20 13:47 被阅读0次

    输入数据格式为:


    输入文件示例

    利用该脚本,可以获得标记的等位信息、杂合率、MAF、缺失率及PIC值。

    perl stat_info_marker.pl example.txt
    
    #!/usr/bin/perl
    
    use warnings;
    use strict;
    
    open IN,"$ARGV[0]";
    open OUT,">marker-info-$ARGV[0]";
    print OUT "Name\tAlleles\tHeterogeneity\tMAF\tMissing\tPIC\n";
    <IN>;
    while (<IN>) {
        chomp;
        my @a = split/\s+/;
        print OUT "$a[0]\t",stat_alleles(@a[1..$#a]),"\t",stat_het(@a[1..$#a]),"\t",stat_maf(@a[1..$#a]),"\t",stat_missing(@a[1..$#a]),"\t",stat_pic(@a[1..$#a]),"\n";
    
    }
    close IN;
    close OUT;
    
    sub stat_alleles {
        my %alleles = ();
        foreach  (@_) {
            s/://;
            /(\w)(\w)/;
            next if $1 =~ /N/;
            $alleles{$1}++;
            $alleles{$2}++;
        }
        return join("/",sort keys %alleles);
    }
    
    
    sub stat_missing {
        my $a = 0;
        my $b = 0;
        foreach  (@_) {
            s/://;
            /(\w)(\w)/;
            if ($1 =~ /N/) {
                $a++;
            }
            $b++;
        }
        if ($b > 0) {
            return $a/$b;
        }else{
            return "NA";
        }
    }
    
    sub stat_maf {
        my $t = 0;
        my %maf = ();
        foreach  (@_) {
            s/://;
            /(\w)(\w)/;
            if ($1 =~ /N/) {
                next;
            } else{
                $maf{$1}++;
                $maf{$2}++;
                $t += 2;
            }
        }
        my @return = sort{$maf{$b}<=>$maf{$a}}keys %maf;
        if ($#return >= 1 && $t > 0) {
            return $maf{$return[1]}/$t;
        }else{
            return "NA";
        }
    }
    
    sub stat_pic {
        my $a = 0;
        my $b = 0;
        my %pic = ();
        foreach  (@_) {
            s/://;
            /(\w)(\w)/;
            if ($1 =~ /N/) {
                next;
            } else{
                $pic{$1}++;
                $pic{$2}++;
                $a += 2;
            }
        }
        if ($a > 0) {
            foreach  (values %pic) {
                $b += (($_/$a)*($_/$a));
            }
            return 1-$b;
        } else{
            return "NA";
        }
    }
    
    
    sub stat_het {
        my $a = 0;
        my $b = 0;
        foreach  (@_) {
            s/://;
            /(\w)(\w)/;
            if ($1 =~ /N/) {
                next;
            } elsif ($1 ne $2) {
                $a++;
            } else {
                $b++;
            }
        }
        if ($a+$b > 0) {
            return $a/($a+$b);
        }else{
            return "NA";
        }
    }
    
    
    

    相关文章

      网友评论

          本文标题:利用perl脚本统计标记的信息

          本文链接:https://www.haomeiwen.com/subject/cmweirtx.html