一个脚本,解决从基因型到运行 Structure 软件所需要的格式:
#!/usr/bin/perl -w
use strict;
die "Usage: perl $0 <infile.txt> STDOUT\n" if @ARGV < 1;
my %out;
my @genes;
open IN, $ARGV[0] || die $!;
chomp (my $header = <IN>);
my @samples = split /\t/, $header;
while(<IN>){
chomp;
my @a = split /\t/, $_;
push @genes, $a[0];
for (my $i=1;$i<@a;$i++){
my @v = split //, $a[$i];
$out{$samples[$i]}{$a[0]} = \@v;
}
}
close IN;
my $out_first = join ("\t", "ID", sort @genes);
print "$out_first\n";
foreach my $sample ( sort keys %out){
my ($out1, $out2);
foreach my $k (sort keys $out{$sample}){
my $left = $out{$sample}{$k}->[0];
my $right = $out{$sample}{$k}->[1];
$out1 .= "$left\t";
$out2 .= "$right\t";
}
$out1 =~ s/N/-9/g;
$out1 =~ s/\t$//g;
$out2 =~ s/N/-9/g;
$out2 =~ s/\t$//g;
print "$sample\t$out1\n$sample\t$out2\n";
}
__END__
有问题讨论!
网友评论