import time
from Bio import SeqIO
# 10秒往上
a = time.time()
with open(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta') as f:
d_fasta = {}
for i in f.readline():
if i.startswith('>'):
a = i.strip()
d_fasta[a] = ''
else:
d_fasta[a] += i.strip()
print(d_fasta['>Chr01A'][500000])
b = time.time()
print(f'简单重复写法取基因组中第500000个碱基花费时间{b - a}')
##第二种方法 1.2899494171142578
a = time.time()
chr_dict = {}
for seq_record in SeqIO.parse(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta', "fasta"):
chr_dict[str(seq_record.id)] = seq_record.seq
print(chr_dict['>Chr01A'][500000])
b = time.time()
print(b - a)
##第三种方法 0.010302305221557617
from pyfaidx import Fasta
a = time.time()
genes = Fasta(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta')
base1 = genes['Chr01A'][500000:5000001].seq
print(base1)
b = time.time()
print(b - a)
# 第四种方法
import os
# 0.7264723777770996
a = time.time()
os.system('samtools faidx /share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta')
os.system('samtools faidx Chr01A.fasta Chr01A:500000-500000')
b = time.time()
print(b - a)
网友评论