python29

作者: rong酱 | 来源:发表于2022-04-26 17:47 被阅读0次
# -*- coding: utf-8 -*-

import os
import sys
import re
from Bio import SeqIO
from Bio.Seq import Seq

in1=sys.argv[1]
out1=sys.argv[2]
out2=sys.argv[3]

ouc=open(out1,'w')

virus_dict={}
with open('.virus_nameid','r') as v:
    vi=v.readlines()
    for vin in vi:
        vinc=vin.strip().split(" ")
        vinkey=str(vinc[0])
        vinvalue=vinc[1:]
        vincon="_".join(vinvalue)
        virus_dict[vinkey]=str(vincon)
print(str(virus_dict.keys()))
print("\n")
print(str(virus_dict))

with open(in1,'r') as i:
    li=i.readlines()
    for lin in li:
        linc=lin.strip().split("\t")
#       pos1=str(linc[3])
#       pos2=str(linc[7])
        matchcon=str(linc[5])
        posID=str(linc[0])
        if str(matchcon) == "150M":
            continue
        else:
            if "S" not in matchcon:
                if "D" in matchcon:
                    conpos=re.split('M|D',matchcon)
                    poslen=0
                    for conposi in conpos:
                        print(str(conposi))
                        if conposi.isdigit():
                            poslen=poslen+int(conposi)
                        elif "I" in str(conposi):
                            conpos1=re.split('I',matchcon)
                            for conpos1i in conpos1:
                                if conpos1i.isdigit():
                                    continue
                                else:
                                    con2=re.split('M|D',conpos1i)
                                    for con2i in con2:
                                        if con2i.isdigit():
                                            poslen=poslen+int(con2i)
                else:
                    poslen=150
            else:
                conpos=re.split('S',matchcon)
                con1=conpos[0]
                if con1.isdigit():
                    poslen=0
                else:
                    con2=re.split('M|D',str(con1))
                    poslen=0
                    for conposi in con2:
                        if conposi.isdigit():
                            poslen=poslen+int(conposi)
                        elif "I" in str(conposi):
                            conpos1=re.split('I',matchcon)[1]
                            con2=re.split('M|D',conpos1)
                            for con2i in con2:
                                if con2i.isdigit():
                                    poslen=poslen+int(con2i)
            pos1id=linc[2]
            pos1pos=linc[3]
            pos2id=linc[6]
            pos2pos=linc[7]
#           print(str(pos2))
            if "chr" in str(pos1id):
                if "virus" in str(pos2id):
#                   posinsert=int(linc[4])
#                   print(str(posinsert))
                    posinsert=int(pos1pos)+int(poslen)
                    virusid=str(pos2id)
                    print("virusid : "+str(virusid))
                    virusname=virus_dict[virusid]
                    #ouc.write(str(linc[0])+"\t"+str(linc[2])+"\t"+str(posinsert)+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
                    ouc.write(str(pos1id)+"\t"+str(posinsert)+"\t"+str(linc[0])+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
            elif "chr" in str(pos2id):
                if "virus" in str(pos1id) :
                    posinsert=int(pos2pos)
                    posinsert=posinsert+int(poslen)
                    virusid=str(pos1id)
                    print("virusid : "+str(virusid))
                    virusname=virus_dict[virusid]
#                   ouc.write(str(linc[0])+"\t"+str(linc[6])+"\t"+str(posinsert)+"\t"+str(virusid)+"\t"+str(virusname)+"\n")
                    ouc.write(str(pos2id)+"\t"+str(posinsert)+"\t"+str(linc[0])+"\t"+str(virusid)+"\t"+str(virusname)+"\n")

ouc.close()

seqdict={}
for seqi in SeqIO.parse('./homo.fa',"fasta"):
    seqid=seqi.id
    seqseq=seqi.seq
    seqdict[seqid]=seqseq

pos_list=[]
ou2=open(out2,'w')
count=0
with open(out1,'r') as i:
    li=i.readlines()
    for lin in li:
        linc=lin.strip().split("\t")
        print(str(linc))
        count=count+1
        poschr=linc[0]
        pospos=linc[1]
        pos_list_con=str(poschr)+"_"+str(pospos)
        if pos_list_con not in pos_list:
            pos_list.append(pos_list_con)
            basepos=seqdict[poschr][int(pospos)]
            ou2.write(str(linc[0])+"\t"+str(linc[1])+"\tINV0000000"+str(count)+"\t"+str(basepos)+"\t<INV>\t1\tPASS\tPRECISE;SVTYPE=INV;SVMETHOD=EMBL.DELLYv0.8.7;""\n")


ou2.close()

相关文章

  • python29

  • 初入Python29/100

    我没做了,我的电脑就3.6没2.7 在线电脑我打不开。 困了好久。没有动过。好吧。做起来。 没有复盘,都不知道要做...

  • 2018-10-21 Python29 ==和is

    == 是用来判断值是否相等 is 是判断是否指向了同一个东西 有一小部分数字存在了常量区,变量要引用这些值时,...

  • 小甲鱼python29课n-m行文本读取

    def recep(reception):'''reception:接收scope:范围:param recept...

网友评论

      本文标题:python29

      本文链接:https://www.haomeiwen.com/subject/uhlhyrtx.html