美文网首页
perl实列,SV类型及位置提取

perl实列,SV类型及位置提取

作者: 花生学生信 | 来源:发表于2024-05-19 08:49 被阅读0次
    #!/usr/bin/perl/ -w
    use strict;
    
    ##useage:perl get.pl ref.vcf map out
    ###ref  chr1    4357    DUP00000001     G       <DUP>   1560    PASS    IMPRECISE;SVTYPE=DUP;SVMETHOD=EMBL.DELLYv1.1.6;END=16430;
    ##map     1     DEL00000015 0   27720
    
    open IN,"<$ARGV[0]";
    
    open MA,"<$ARGV[1]";
    
    open OU,">$ARGV[2]";
    
    my %MA=();
    my $pos;
    while(<MA>){
        chomp;
        my @ma=split/\t/;
        my $pos=$ma[3];
        $MA{$ma[1]}=$ma[3];
    
    
    }
    
    
    while (<IN>){
        chomp;
        my @ref=split;
        my $chr=$ref[0];
        my $str=$ref[1];
        my $id=$ref[2];
        my @end=$ref[7]=~/(\d+)/g; ###提取所有数字
        my $en=$end[5];##第四个匹配的就是end值
    
    ###不太懂为啥不可以直接写成标量而是要写数组
        my @type=$ref[7]=~ m/SVTYPE=([^;]*)/;
        my $typ=$type[0];
    
        my $len=$en-$str+1;
        if (exists $MA{$id}){   
    
        print OU "$typ\t$chr\t$str\t$en\n";
    }   
    }
    
    close MA;
    close ref;
    close OU;
    

    字符匹配实列:

    my $input = "PRECISE;SVMETHOD =Snifflesv1.0.12;CHR2=chr01;END=239620;STD_quant_start=0.912871;STD_quant_stop=2.300362;Kurtosis_quant_s tart=-1.468666;Kurtosis_quant_stop=-1.553253;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;STRANDS2=21,27,21 ,27;RE=48;REF_strand=25,31;Strandbias_pval=1;AF=0.461538";
    if ($input =~ /SVTYPE=(\w+);/) {
        my $svtype = $1;
        print "SVTYPE is: $svtype\n";
    } else {
        print "SVTYPE not found\n";
    }
    
    #!/usr/bin/perl/ -w
    use strict;
    
    ##useage:perl get.pl ref.vcf map out
    ###ref  chr1    4357    DUP00000001     G       <DUP>   1560    PASS    IMPRECISE;SVTYPE=DUP;SVMETHOD=EMBL.DELLYv1.1.6;END=16430;
    ##map     1     DEL00000015 0   27720
    
    open IN,"<$ARGV[0]";
    
    open MA,"<$ARGV[1]";
    
    open OU,">$ARGV[2]";
    
    my %MA=();
    my $pos;
    while(<MA>){
        chomp;
        my @ma=split/\t/;
        my $pos=$ma[3];
        $MA{$ma[1]}=$ma[3];
    
    
    }
    
    
    while (<IN>){
        chomp;
        my @ref=split;
        my $chr=$ref[0];
        my $str=$ref[1];
        my $id=$ref[2];
        my @end=$ref[7]=~/(\d+)/g; ###提取所有数字
    #    my $en=$end[5];##第四个匹配的就是end值
    
    ###对于sniffles的BND,改成通用的
    #   my @end=ref[7]=~m/SVLEN=([^(\d+);])*/;
        my $en=$end[14]+$str;  ##第15个数字
    
    
        my @type=$ref[7]=~ m/SVTYPE=([^;]*)/;
        my $typ=$type[0];
    
    #    my $len=$en-$str+1;
        if (exists $MA{$id}){   
    
        print OU "$typ\t$chr\t$str\t$en\n";
    }   
    }
    
    close MA;
    close ref;
    close OU;
    
    sniffles结果,后面的是BND,实际上是影响2条染色体
    
    
    vcftools --vcf LTH.vcf --plink --out LTH
    
    perl get.pl LTH.vcf LTH.map out
    
    bedtools intersect -a NIP_bed  -b out -loj >LTH
    

    过滤

    cat LTH.vcf |perl -lane 'if(/^#/){print}elsif(@F[6] eq "PASS"){print}' > pass/LTH.pass.vcf
    
    

    相关文章

      网友评论

          本文标题:perl实列,SV类型及位置提取

          本文链接:https://www.haomeiwen.com/subject/vymgfjtx.html