美文网首页
2022-03-10

2022-03-10

作者: 球果假水晶蓝 | 来源:发表于2022-03-10 19:21 被阅读0次
    import time
    from Bio import SeqIO
    # 10秒往上
    a = time.time()
    with open(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta') as f:
        d_fasta = {}
        for i in f.readline():
            if i.startswith('>'):
                a = i.strip()
                d_fasta[a] = ''
            else:
                d_fasta[a] += i.strip()
    
    print(d_fasta['>Chr01A'][500000])
    b = time.time()
    print(f'简单重复写法取基因组中第500000个碱基花费时间{b - a}')
    
    ##第二种方法  1.2899494171142578
    a = time.time()
    chr_dict = {}
    for seq_record in SeqIO.parse(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta', "fasta"):
        chr_dict[str(seq_record.id)] = seq_record.seq
    print(chr_dict['>Chr01A'][500000])
    b = time.time()
    print(b - a)
    
    ##第三种方法  0.010302305221557617
    from pyfaidx import Fasta
    a = time.time()
    genes = Fasta(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta')
    base1 = genes['Chr01A'][500000:5000001].seq
    print(base1)
    b = time.time()
    print(b - a)
    
    # 第四种方法
    import os
    # 0.7264723777770996
    a = time.time()
    os.system('samtools faidx /share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta')
    os.system('samtools faidx Chr01A.fasta Chr01A:500000-500000')
    b = time.time()
    print(b - a)
    
    
    

    相关文章

      网友评论

          本文标题:2022-03-10

          本文链接:https://www.haomeiwen.com/subject/bphqdrtx.html