美文网首页生物信息学习
fastq2fasta 转换小脚本

fastq2fasta 转换小脚本

作者: 正踪大米饭儿 | 来源:发表于2017-12-08 16:07 被阅读27次

    一个小工具,主要针对 小RNA 数据操作使用,将 fastq reads 转换为 fasta 。

    #!/usr/bin/perl -w
    use strict;
    use Getopt::Long;
    use File::Basename;
    
    my ( $fastq, $out, $pre, $type, $help );
    
    GetOptions(
        "fq=s"   => \$fastq,
        "out:s"  => \$out,
        "pre:s"  => \$pre,
        "type:s" => \$type,
        "help|h!"=> \$help
    );
    
    die &usage if (!defined $fastq || defined $help );
    
    $out ||= "Result/out"; $pre ||= "seq";
    $type ||= "1";
    my $outdir = dirname( $out );
    system ("mkdir -p $outdir");
    
    ## convert fastq to fasta and remove redundancy reads
    ## seq_id: seq_0000001_x345 
    
    my ($seq, $total, $unique ) = &reads_counter($fastq);
    my $fo = scalar ( length ($unique) ); 
    my $ff = "%0".$fo."d";
    my $i = 0;
    
    open O1, ">$out.convert.fa" || die $!;
    foreach my $k ( keys %$seq ){
    
        $i++;
        my $cnt = $seq->{$k};
        my $id;
    
        if ($type == 1){
            $id = $pre."_".sprintf ("$ff", $i)."_x$cnt";
        } elsif ($type == 2){
            $id = $pre.sprintf("$ff",$i)."\t$cnt";
        } else {
            print STDERR "ERROR! -type options must be 1 or 2.";
            print STDERR "1 for [{$pre}xxx_00002_x345] 2 for [{$pre}00002\t345]\n";
            exit;
        }
    
        print O1 ">$id\n$k\n";
    }
    close O1;
    
    open STAT, ">$out.Reads.stat" || die $!;
    print STAT "total_reads\tunique_reads\n";
    print STAT "$total\t$unique\n";
    close STAT;
    
    ## =============================== SUB MODULE ============================ ##
    
    sub reads_counter{
        
        use PerlIO::gzip;
        my $infile = shift @_;
        
        if ( $infile =~ /\.gz$/ ) {
            open FQ, "<gzip:",$infile;
        } else {
            open FQ, "<$infile";
        }
    
        my $line = 0;
        my $total_reads = 0;
        my %seq;
    
        while (<FQ>){
            chomp;
            $line++;
            if ($line == 2){
                $total_reads++;
                $seq{$_}++;
            } elsif ($line == 4){
                $line = 0;
            } else {
                next;
            }
        }
        close FQ;
    
        my $unique_reads = scalar ( keys %seq );
    
        return ( \%seq, $total_reads, $unique_reads );
    }
    
    
    sub usage{
    
        my $name = basename($0);
    
        print STDERR <<USAGE;
    ===============================================================================
    Name:
        $name
    
    Usage:
        perl $name [options]
    
    Options:
        -fq      input fastq [.fq|.fastq|.fq.gz|.fastq.gz]
        -out     out put [defualt: out.fa]
        -pre     prefix of sequence id.[defualt: seq]
        -type    type for out fomat.[1 or 2, defualt:1]
                 1: xxx_000002_x234  2: xxx00002  234.
        --help   print this help information.
        -h
    
    e.g:
        perl $name -fq tomA.fq.gz -out Result/out.fa -pre seq
    
    ===============================================================================
    USAGE
        exit 1;
    }
    
    __END__
    
    Author :    Liupeng@genebang.com
    Date   :    2017-06-04
    
    

    相关文章

      网友评论

        本文标题:fastq2fasta 转换小脚本

        本文链接:https://www.haomeiwen.com/subject/brgeixtx.html