python29

作者: rong酱 | 来源:发表于2022-04-26 17:47 被阅读0次
    # -*- coding: utf-8 -*-
    
    import os
    import sys
    import re
    from Bio import SeqIO
    from Bio.Seq import Seq
    
    in1=sys.argv[1]
    out1=sys.argv[2]
    out2=sys.argv[3]
    
    ouc=open(out1,'w')
    
    virus_dict={}
    with open('.virus_nameid','r') as v:
        vi=v.readlines()
        for vin in vi:
            vinc=vin.strip().split(" ")
            vinkey=str(vinc[0])
            vinvalue=vinc[1:]
            vincon="_".join(vinvalue)
            virus_dict[vinkey]=str(vincon)
    print(str(virus_dict.keys()))
    print("\n")
    print(str(virus_dict))
    
    with open(in1,'r') as i:
        li=i.readlines()
        for lin in li:
            linc=lin.strip().split("\t")
    #       pos1=str(linc[3])
    #       pos2=str(linc[7])
            matchcon=str(linc[5])
            posID=str(linc[0])
            if str(matchcon) == "150M":
                continue
            else:
                if "S" not in matchcon:
                    if "D" in matchcon:
                        conpos=re.split('M|D',matchcon)
                        poslen=0
                        for conposi in conpos:
                            print(str(conposi))
                            if conposi.isdigit():
                                poslen=poslen+int(conposi)
                            elif "I" in str(conposi):
                                conpos1=re.split('I',matchcon)
                                for conpos1i in conpos1:
                                    if conpos1i.isdigit():
                                        continue
                                    else:
                                        con2=re.split('M|D',conpos1i)
                                        for con2i in con2:
                                            if con2i.isdigit():
                                                poslen=poslen+int(con2i)
                    else:
                        poslen=150
                else:
                    conpos=re.split('S',matchcon)
                    con1=conpos[0]
                    if con1.isdigit():
                        poslen=0
                    else:
                        con2=re.split('M|D',str(con1))
                        poslen=0
                        for conposi in con2:
                            if conposi.isdigit():
                                poslen=poslen+int(conposi)
                            elif "I" in str(conposi):
                                conpos1=re.split('I',matchcon)[1]
                                con2=re.split('M|D',conpos1)
                                for con2i in con2:
                                    if con2i.isdigit():
                                        poslen=poslen+int(con2i)
                pos1id=linc[2]
                pos1pos=linc[3]
                pos2id=linc[6]
                pos2pos=linc[7]
    #           print(str(pos2))
                if "chr" in str(pos1id):
                    if "virus" in str(pos2id):
    #                   posinsert=int(linc[4])
    #                   print(str(posinsert))
                        posinsert=int(pos1pos)+int(poslen)
                        virusid=str(pos2id)
                        print("virusid : "+str(virusid))
                        virusname=virus_dict[virusid]
                        #ouc.write(str(linc[0])+"\t"+str(linc[2])+"\t"+str(posinsert)+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
                        ouc.write(str(pos1id)+"\t"+str(posinsert)+"\t"+str(linc[0])+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
                elif "chr" in str(pos2id):
                    if "virus" in str(pos1id) :
                        posinsert=int(pos2pos)
                        posinsert=posinsert+int(poslen)
                        virusid=str(pos1id)
                        print("virusid : "+str(virusid))
                        virusname=virus_dict[virusid]
    #                   ouc.write(str(linc[0])+"\t"+str(linc[6])+"\t"+str(posinsert)+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
                        ouc.write(str(pos2id)+"\t"+str(posinsert)+"\t"+str(linc[0])+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
    
    ouc.close()
    
    seqdict={}
    for seqi in SeqIO.parse('./homo.fa',"fasta"):
        seqid=seqi.id
        seqseq=seqi.seq
        seqdict[seqid]=seqseq
    
    pos_list=[]
    ou2=open(out2,'w')
    count=0
    with open(out1,'r') as i:
        li=i.readlines()
        for lin in li:
            linc=lin.strip().split("\t")
            print(str(linc))
            count=count+1
            poschr=linc[0]
            pospos=linc[1]
            pos_list_con=str(poschr)+"_"+str(pospos)
            if pos_list_con not in pos_list:
                pos_list.append(pos_list_con)
                basepos=seqdict[poschr][int(pospos)]
                ou2.write(str(linc[0])+"\t"+str(linc[1])+"\tINV0000000"+str(count)+"\t"+str(basepos)+"\t<INV>\t1\tPASS\tPRECISE;SVTYPE=INV;SVMETHOD=EMBL.DELLYv0.8.7;""\n")
    
    
    ou2.close()
    

    相关文章

      网友评论

          本文标题:python29

          本文链接:https://www.haomeiwen.com/subject/uhlhyrtx.html