import re, sys, os
# 构建序列名-序列字典
with open("protein_rm_enter.faa") as f:
Dict = {}
keys = []
for line in f:
if line[0] == ">":
key = line.strip()
keys.append(key)
Dict[key] = []
else:
Dict[key].append(line.strip())
# 遍历目标序列名,提取目标序列
with open("protein2.target", 'w') as o:
with open("apd.target") as target:
for tar in target:
tar = tar.strip()
tar = ">{}_1".format(tar)
if tar in keys:
o.write("{}\t{}\n".format(tar, ''.join(Dict[tar])))
网友评论