python35

作者: rong酱 | 来源:发表于2022-12-14 09:52 被阅读0次
#!/usr/bin/env python

import re
import os
import sys
import argparse
import gzip

parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-b', '--bedfile', help = 'the pathway of the bed file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
bfile = os.path.abspath(argv['bedfile'].strip())

def bedchr(bfile):
    chrID=[]
    with open(bfile,'r') as b:
        for bl in b:
            bli=bl.strip().split('\t')
            chrb=str(bli[0])
            if chrb not in chrID:
                chrID.append(chrb)
    return chrID
     
# GQ >10, indicates a 90% pro
def filtervcf(inputfile,chrid,ofile): # chrID type: list, include the reference chromode ID
    oc=open(ofile,'w')
    with gzip.open(inputfile,'rb') as v:
        for vi in v:
            vic = vi.strip().split('\t')
            if str(vic[0]).startswith('#'):
                if str(vic[0]).startswith('##contig'):
                    vi1=vi.strip().split('=')[2]
                    vi2=vi1.strip().split(',')[0]
                    if str(vi2) in chrid:
                        oc.write(str(vi))
                else:
                    oc.write(str(vi))
            else:
                samfinfoGQ = int(vic[-1].strip().split(':')[1])
                if str(vic[0]) in chrid and 'RefCall' not in str(vic) and samfinfoGQ >=20:
                    oc.write(str(vi))

if __name__ == "__main__":
    bedchrid=bedchr(bfile)
    filtervcf(ifile,bedchrid,ofile)

优化设置阈值

#!/usr/bin/env python

import re
import os
import sys
import argparse
import gzip

parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-b', '--bedfile', help = 'the pathway of the bed file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
parser.add_argument('-g', '--GQnumber', help = 'the value of GQ,Conditional genotype quality ', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
bfile = os.path.abspath(argv['bedfile'].strip())
gnum=int(argv['GQnumber'].strip())

def bedchr(bfile):
    chrID=[]
    with open(bfile,'r') as b:
        for bl in b:
            bli=bl.strip().split('\t')
            chrb=str(bli[0])
            if chrb not in chrID:
                chrID.append(chrb)
    return chrID
     
# GQ >10, indicates a 90% pro
def filtervcf(inputfile,chrid,ofile,gnum): # chrID type: list, include the reference chromode ID
    oc=open(ofile,'w')
    with gzip.open(inputfile,'rb') as v:
        for vi in v:
            vic = vi.strip().split('\t')
            if str(vic[0]).startswith('#'):
                if str(vic[0]).startswith('##contig'):
                    vi1=vi.strip().split('=')[2]
                    vi2=vi1.strip().split(',')[0]
                    if str(vi2) in chrid:
                        oc.write(str(vi))
                else:
                    oc.write(str(vi))
            else:
                samfinfoGQ = int(vic[-1].strip().split(':')[1])
                if str(vic[0]) in chrid and 'RefCall' not in str(vic) and samfinfoGQ >=int(gnum):
                    oc.write(str(vi))

if __name__ == "__main__":
    bedchrid=bedchr(bfile)
    filtervcf(ifile,bedchrid,ofile,gnum)

相关文章

网友评论

      本文标题:python35

      本文链接:https://www.haomeiwen.com/subject/rmgkqdtx.html