一:端粒蛋白质序列中氨基酸出现的频率
Telomerase_seq = "MPRAPRCRAVRSLLRSHYREVLPLATFVRRLGPQGWRLVQRGDPAAFRALVAQCLVCVPWDARPPPAAPSFRQVSCLKELVARVLQRLCERGAKNVLAFGFALLDGARGGPPEAFTTSVRSYLPNTVTDALRGSGAWGLLLRRVGDDVLVHLLARCALFVLVAPSCAYQVCGPPLYQLGAATQARPPPHASGPRRRLGCERAWNHSVREAGVPLGLPAPGARRGGSASRSLPLPKRPRRGAAPEPERTPVGQGSWAHPGRTRGPSDRGFCVVSPARPAEEATSLEGALSGTRHSHPSVGRQHHAGPPSTSRPPRPWDTPCPPVYAETKHFLYSSGDKEQLRPSFLLSSLRPSLTGARRLVETIFLGSRPWMPGTPRRLPRLPQRYWQMRPLFLELLGNHAQCPYGVLLKTHCPLRAAVTPAAGVCAREKPQGSVAAPEEEDTDPRRLVQLLRQHSSPWQVYGFVRACLRRLVPPGLWGSRHNERRFLRNTKKFISLGKHAKLSLQELTWKMSVRDCAWLRRSPGVGCVPAAEHRLREEILAKFLHWLMSVYVVELLRSFFYVTETTFQKNRLFFYRKSVWSKLQSIGIRQHLKRVQLRELSEAEVRQHREARPALLTSRLRFIPKPDGLRPIVNMDYVVGARTFRREKRAERLTSRVKALFSVLNYERARRPGLLGASVLGLDDIHRAWRTFVLRVRAQDPPPELYFVKVDVTGAYDTIPQDRLTEVIASIIKPQNTYCVRRYAVVQKAAHGHVRKAFKSHVSTLTDLQPYMRQFVAHLQETSPLRDAVVIEQSSSLNEASSGLFDVFLRFMCHHAVRIRGKSYVQCQGIPQGSILSTLLCSLCYGDMENKLFAGIRRDGLLLRLVDDFLLVTPHLTHAKTFLRTLVRGVPEYGCVVNLRKTVVNFPVEDEALGGTAFVQMPAHGLFPWCGLLLDTRTLEVQSDYSSYARTSIRASLTFNRGFKAGRNMRRKLFGVLRLKCHSLFLDLQVNSLQTVCTNIYKILLLQAYRFHACVLQLPFHQQVWKNPTFFLRVISDTASLCYSILKAKNAGMSLGAKGAGPLPSEAVQWLCHQAFLLKLTRHRVTYVPLLGSLRTAQTQLSRKLPGTTLTALEAAANPALPSDFKTILD"
count_aa = {}
max_count = 0
for aa in "ACDEFGHIKLMNPQRSTVWY":
number = Telomerase_seq.count(aa) ##计算每个氨基酸出现的频率,是一个迭代器形式
## print(number)
count_aa[aa] = number ##返回每个氨基酸对应的频率的字典
## print(count_aa.items())
if max_count < number:
max_count = number ##判断出现最多的次数,并赋值给max_count
## print(max_count)
for key, value in count_aa.items(): ##由于count_aa为字典类型,所以通过最大的value找到key
if value == max_count:
print(key,count_aa[key]) ##打印出现最多次数的氨基酸及出现的次数
L 147
## count_aa的内容
dict_items([('A', 98), ('C', 29), ('D', 34), ('E', 45), ('F', 47), ('G', 75), ('H', 34), ('I', 23), ('K', 40), ('L', 147), ('M', 12), ('N', 21), ('P', 87), ('Q', 47), ('R', 124), ('S', 75), ('T', 58), ('V', 88), ('W', 18), ('Y', 28)])
二:DNA序列中碱基出现的频率
## DNA序列中核苷酸碱基出现的频率
dna_seq = 'ATGAATGTTCAGGCTCACATGTCTGGTCAACGGTCTGGGCAGGTTCCAAACCAAGGGACA'
for n in 'ATCG':
number = dna_seq.count(n)
print(number,n)
16 A
13 T
14 C
17 G
三:一次一个残基地打印出氨基酸序列
insulin = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN"
'''
for i in range(1,len(insulin)-1):
print(insulin[0:i])
'''
## 或者
for i in insulin:
print(i)
## 第一种类似如下输出:
M
MA
MAL
MALW
MALWM
##第二种类似如下输出:
A
L
E
G
网友评论