open FA,"$ARGV[0]";
while(<FA>){
chomp;
my ($sub,$ids,$site_num) = split /\t/,$_;
#PTM site==1的统计
if($site_num==1){
if (exists $hash1{$sub}){
$hash1{$sub}+=1;
}
else{
$hash1{$sub}=1;
}
}
#PTM site>1 & <5 的统计
elsif($site_num>1 and $site_num<=5 ){
if (exists $hash2_5{$sub}){
$hash2_5{$sub}+=1;
}
else{
$hash2_5{$sub}=1;
}
}
#PTM site>5 的统计
elsif($site_num>5){
if (exists $hash5{$sub}){
$hash5{$sub}+=1;
}
else{
$hash5{$sub}=1;
}
}
}
open OU1,">$ARGV[1]";
#foreach(keys %hash1){
#print "$_\t$hash1{$_}\n";
#}
@array=(\%hash1,\%hash2_5,\%hash5);
#print"@array\n";
foreach $hash (@array){
#print"%{$_}\n";
print OU1 "----------------------------------------\n";
foreach(keys %{$hash}){
#print"$_\n";
print OU1 "$_\t$$hash{$_}\n";
}
}
Input file (format (\t seperated): Sub Uniprot_ID PTM_site_number_per_protein)
cytoplasm Q6K461 1
plasma membrane Q6ER91 1
chloroplast Q0JBV4 1
nucleus Q8LR61 1
cytoplasm Q40710 1
cytoplasm Q67J05 1
...
vacuolar membrane Q84TX6 7
chloroplast Q5NBT9 7
chloroplast Q8W317 7
extracellular Q339K4 7
cytoplasm A0A0P0W4W6 8
chloroplast P0C361 8
plasma membrane Q10LT8 9
plasma membrane B9G2A8 11
Output file
----------------------------------------
peroxisome 13
nucleus 323
chloroplast 722
"chloroplast,mitochondria" 2
"nucleus,plasma membrane" 1
"cytoplasm,plasma membrane" 1
extracellular 102
endoplasmic reticulum 19
Golgi apparatus 5
cytoplasm 416
cytoskeleton 21
vacuolar membrane 32
"cytoplasm,nucleus" 2
mitochondria 95
plasma membrane 122
----------------------------------------
endoplasmic reticulum 7
extracellular 44
peroxisome 6
nucleus 116
"chloroplast,mitochondria" 2
chloroplast 403
mitochondria 32
"cytoplasm,nucleus" 2
plasma membrane 45
vacuolar membrane 24
Golgi apparatus 2
cytoskeleton 14
cytoplasm 213
----------------------------------------
plasma membrane 3
extracellular 2
vacuolar membrane 1
chloroplast 7
cytoplasm 8
nucleus 1
网友评论