#!/usr/bin/env python
# -*- coding:utf-8 -*-
unsplit_file = "/unsplit.bam"
out_dir = "/t11/b2/"
import pysam
import sys
import os
import re
itr = 0
CB_hold = 'unset'
samfile = pysam.AlignmentFile(unsplit_file, "rb")
for read in samfile.fetch(until_eof=True):
if 'CB' in str(read):
print("read:" + str(read))
CB_itr = read.get_tag('CB')
print("CB_itr : "+str(CB_itr))
if( CB_itr!=CB_hold or itr==0):
if(itr!=0):
split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,itr), "wb", template=samfile)
split_file.close()
CB_hold = CB_itr
itr = itr + 1
split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,itr), "wb", template=samfile)
split_file.write(read)
split_file.close()
samfile.close()
优化
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pysam
import sys
import os
import re
import threading
unsplit_file = sys.argv[1]
out_dir = sys.argv[2]
def run(unsplit_file,out_dir):
CB_hold = 'unset'
samfile = pysam.AlignmentFile(unsplit_file, "rb")
for read in samfile.fetch(until_eof=True):
if 'CB' in str(read): # 过滤不含有CB
CB_itr = read.get_tag('CB')
split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,CB_itr), "wb", template=samfile)
split_file.write(read)
return
if __name__ == '__main__':
for i in range(10):
t = threading.Thread(target=run(unsplit_file,out_dir))
t.start()
生物信息学个人理解: 基础应该是统计学+计算机学
网友评论