import re
import os
import sys
import argparse
import gzip
parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
chrID=['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','X','Y','MT']
oc=open(ofile,'w')
vfile = gzip.open(ifile,'rb')
for vi in vfile.readlines():
vic = vi.decode()
if vic.startswith('#'):
if 'contig=<ID' in vic:
vi1=vi.strip().split('=')[2]
vi2=vi1.strip().split(',')[0]
if vi2 in chrID:
oc.write(str(vi))
else:
oc.write(str(vi))
elif str(vic.strip().split('\t')[0]) in chrID and 'RefCall' not in vic:
oc.write(str(vi))
每一次写脚本都是在细节中不断优化
import re
import os
import sys
import argparse
import gzip
parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
chrID=['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','X','Y','MT']
oc=gzip.open(ofile,'wb')
vfile = gzip.open(ifile,'rb')
for vi in vfile.readlines():
vic = vi.decode()
if vic.startswith('#'):
if 'contig=<ID' in vic:
vi1=vi.strip().split('=')[2]
vi2=vi1.strip().split(',')[0]
if vi2 in chrID:
oc.write(str(vic))
else:
oc.write(str(vic))
elif str(vic.strip().split('\t')[0]) in chrID and 'RefCall' not in str(vic):
oc.write(str(vic))
vfile.close()
oc.close()
网友评论