pysam

作者: rong酱 | 来源:发表于2021-05-26 13:53 被阅读0次
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    unsplit_file = "/unsplit.bam"
    out_dir = "/t11/b2/"
    
    import pysam
    import sys
    import os
    import re
    
    
    itr = 0
    CB_hold = 'unset'
    
    samfile = pysam.AlignmentFile(unsplit_file, "rb")
    
    for read in samfile.fetch(until_eof=True):
        if 'CB' in str(read):
            print("read:" + str(read))
            CB_itr = read.get_tag('CB')
            print("CB_itr : "+str(CB_itr))
            if( CB_itr!=CB_hold or itr==0):
                if(itr!=0):
                    split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,itr), "wb", template=samfile)
                    split_file.close()     
                CB_hold = CB_itr
                itr = itr + 1
                split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,itr), "wb", template=samfile)
                split_file.write(read) 
    
    split_file.close()
    samfile.close()
    

    优化

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import pysam
    import sys
    import os
    import re
    import threading
    unsplit_file = sys.argv[1]
    out_dir = sys.argv[2]
    def run(unsplit_file,out_dir):
       CB_hold = 'unset'
       samfile = pysam.AlignmentFile(unsplit_file, "rb")
       for read in samfile.fetch(until_eof=True):
           if 'CB' in str(read):  # 过滤不含有CB
               CB_itr = read.get_tag('CB')
               split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,CB_itr), "wb", template=samfile)
               split_file.write(read)
       return
    if __name__ == '__main__':
       for i in range(10):
           t = threading.Thread(target=run(unsplit_file,out_dir))
           t.start()
    

    生物信息学个人理解: 基础应该是统计学+计算机学

    相关文章

      网友评论

          本文标题:pysam

          本文链接:https://www.haomeiwen.com/subject/mbsksltx.html