将gbk格式文件转换成fasta格式文件
flag=0
fasta = open ('/mnt/f/biopython/AY810830.fasta','w')
with open ('/mnt/f/biopython/AY810830.gbk','r') as f:
for line in f:
if line[0:9] =='ACCESSION':
fasta.writelines('>'+line.split()[1]+'\n')
elif line[0:6] =='ORIGIN':
flag = 1
elif flag == 1:
s = line.split()
if s != []:
print(s)
seq=''.join(s[1:])
fasta.writelines(seq.upper()+'\n')
fasta.close()
将多个fasta序列中提取含有'Homo sapiens'的序列
fasta_file=open('multi.fasta','r')
out_file=open('human.fasta','w')
seq = ''
for line in fasta_file:
if line[0]=='>' and seq =='':
header = line
elif line[0] != '>':
seq=seq+line
elif line[0] =='>' and seq !='':
if "Homo sapiens" in header:
out_file.write(header+seq)
seq = ''
header = line
if "Homo sapiens" in header :
out_file.write(header+seq)
out_file.close()
请关注我的公众号----生信栈,不定期分享实用的生物信息干货!!!
网友评论