一个小工具,主要针对 小RNA 数据操作使用,将 fastq reads 转换为 fasta 。
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use File::Basename;
my ( $fastq, $out, $pre, $type, $help );
GetOptions(
"fq=s" => \$fastq,
"out:s" => \$out,
"pre:s" => \$pre,
"type:s" => \$type,
"help|h!"=> \$help
);
die &usage if (!defined $fastq || defined $help );
$out ||= "Result/out"; $pre ||= "seq";
$type ||= "1";
my $outdir = dirname( $out );
system ("mkdir -p $outdir");
## convert fastq to fasta and remove redundancy reads
## seq_id: seq_0000001_x345
my ($seq, $total, $unique ) = &reads_counter($fastq);
my $fo = scalar ( length ($unique) );
my $ff = "%0".$fo."d";
my $i = 0;
open O1, ">$out.convert.fa" || die $!;
foreach my $k ( keys %$seq ){
$i++;
my $cnt = $seq->{$k};
my $id;
if ($type == 1){
$id = $pre."_".sprintf ("$ff", $i)."_x$cnt";
} elsif ($type == 2){
$id = $pre.sprintf("$ff",$i)."\t$cnt";
} else {
print STDERR "ERROR! -type options must be 1 or 2.";
print STDERR "1 for [{$pre}xxx_00002_x345] 2 for [{$pre}00002\t345]\n";
exit;
}
print O1 ">$id\n$k\n";
}
close O1;
open STAT, ">$out.Reads.stat" || die $!;
print STAT "total_reads\tunique_reads\n";
print STAT "$total\t$unique\n";
close STAT;
## =============================== SUB MODULE ============================ ##
sub reads_counter{
use PerlIO::gzip;
my $infile = shift @_;
if ( $infile =~ /\.gz$/ ) {
open FQ, "<gzip:",$infile;
} else {
open FQ, "<$infile";
}
my $line = 0;
my $total_reads = 0;
my %seq;
while (<FQ>){
chomp;
$line++;
if ($line == 2){
$total_reads++;
$seq{$_}++;
} elsif ($line == 4){
$line = 0;
} else {
next;
}
}
close FQ;
my $unique_reads = scalar ( keys %seq );
return ( \%seq, $total_reads, $unique_reads );
}
sub usage{
my $name = basename($0);
print STDERR <<USAGE;
===============================================================================
Name:
$name
Usage:
perl $name [options]
Options:
-fq input fastq [.fq|.fastq|.fq.gz|.fastq.gz]
-out out put [defualt: out.fa]
-pre prefix of sequence id.[defualt: seq]
-type type for out fomat.[1 or 2, defualt:1]
1: xxx_000002_x234 2: xxx00002 234.
--help print this help information.
-h
e.g:
perl $name -fq tomA.fq.gz -out Result/out.fa -pre seq
===============================================================================
USAGE
exit 1;
}
__END__
Author : Liupeng@genebang.com
Date : 2017-06-04
网友评论