# !usr/bin/env python3
# -*- coding:utf-8 -*-
"""
@FileName: 20220220测试
@Time: 2022/2/21,12:00
@NAME: zhang yixing
"""
import argparse
def get_gen(filename, id=None, genid_txt=None):
id_gen_dict = {}
with open(filename, 'r') as f:
for line in f:
if line.startswith('>'):
name = line.split()[0]
id_gen_dict[name] = ''
else:
id_gen_dict[name] += line.rstrip()
if genid_txt==None:
print(genid_txt==None)
t = id_gen_dict.get(id,'no found')
if t == 'no found':
print('no found')
else:
with open('./gen.txt', 'w') as g:
g.write(id)
g.write('\n')
g.write(t)
else:
with open(genid_txt, 'r') as id_txt:
res_dict = {}
for line in id_txt:
if id_gen_dict.get(line.strip(),'no found') == 'no found':
res_dict[line.strip()] = 'no found'
else:
res_dict[line.strip()] = id_gen_dict[line.strip()]
with open('./gen.txt', 'w') as g:
for key, value in res_dict.items():
g.write(key)
g.write('\n')
g.write(value+'\n')
def main():
parser = argparse.ArgumentParser(usage='程序用法非常简单,出现问题可以评论我', description='从fasta格式中根据基因名字提取基因序列并且生成gen.txt文件')
parser.add_argument("-i", "--input", help="input filename") # 输入的基因fasta文件
parser.add_argument("-g", "--genid",help="gen id") # 基因ID
parser.add_argument("-t", "--genid_txt", help="a lot of id in genid.txt") # TXT文件中有需要的从fasta文件中提取的一批基因ID
args = parser.parse_args()
if args.genid:
get_gen(args.input, args.genid)
print(args.input, args.genid)
elif args.genid_txt:
get_gen(args.input, None, genid_txt=args.genid_txt)
print(args.input, args.genid_txt)
# print(args.input, args.genid)
if __name__ == '__main__':
main()
![](https://img.haomeiwen.com/i27226870/76a0056535668987.png)
用法1
python get_gen.py -i cand.cds -g '>HHKEBNDL_00001'
![](https://img.haomeiwen.com/i27226870/81fab3e49b657e13.png)
用法2
python get_gen.py -i cand.fa -t genid.txt
以上两种,结果都储存在gen.txt中
![](https://img.haomeiwen.com/i27226870/d1e9b972a3a2bef6.png)
对于没找的基因序列,在基因名字下面用 no found 代替
网友评论