#!/usr/bin/env python
##coding=utf-8
import re
import os
import sys
import argparse
import gzip
parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input fastq file ', required = True)
parser.add_argument('-id', '--input_id', help = 'the pathway of the input id file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
idf = os.path.abspath(argv['input_id'].strip())
def creatlist(inputfile):
idlist=[]
with open(inputfile,'rb') as v:
for vi in v:
vic = vi.strip().split('\n')
idlist.append(str(vic[0]))
return idlist
def trimfq(inputfile,outfile,idList):
oc=open(outfile,'w')
with gzip.open(inputfile,'rb') as v:
i=0
idn=-1
for vi in v:
i=i+1
vic = vi.strip().split('\n')
vicon=vic[0]
if str(vicon).startswith('@'):
vcid=str(vicon).replace('@','')
if vcid in idList:
print("vcid : "+str(vcid))
idn=i
idr=vcid
elif int(i)==idn+1:
idseq=str(vicon)
oc.write(">"+str(idr)+"\n"+str(idseq)+"\n")
oc.close
idL=creatlist(idf)
trimfq(ifile,ofile,idL)
网友评论