# -*- coding: utf-8 -*-
import os
import sys
import re
in1=sys.argv[1]
out1=sys.argv[2]
ouc=open(out1,'w')
with open(in1,'r') as i:
li=i.readlines()
for lin in li:
linc=lin.strip().split("\t")
pos1=str(linc[2])
pos2=str(linc[6])
matchcon=str(linc[5])
posID=str(linc[0])
con1=str(linc[5]).strip().split("M")
result=con1[0]
if result.isdigit():
if "chr" in str(pos1) and "virus" in str(pos2) and "150M" not in str(matchcon):
posinsert=int(linc[3])+int(result)
ouc.write(str(linc[0])+"\t"+str(linc[2])+"\t"+str(posinsert)+"\n")
elif "chr" in str(pos2) and "virus" in str(pos1) and "150M" not in str(matchcon):
posinsert=int(linc[7])+int(result)
ouc.write(str(linc[0])+"\t"+str(linc[6])+"\t"+str(posinsert)+"\n")
con2=str(linc[5]).strip().split("S")
result1=con2[0]
if result1.isdigit():
if "chr" in str(pos1) and "virus" in str(pos2) and "150M" not in str(matchcon):
posinsert=int(linc[3])
ouc.write(str(linc[0])+"\t"+str(linc[2])+"\t"+str(posinsert)+"\n")
elif "chr" in str(pos2) and "virus" in str(pos1) and "150M" not in str(matchcon):
posinsert=int(linc[7])
ouc.write(str(linc[0])+"\t"+str(linc[6])+"\t"+str(posinsert)+"\n")
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import re
from Bio import SeqIO
from Bio.Seq import Seq
in1=sys.argv[1]
ou1 = sys.argv[2]
seqdict={}
for seqi in SeqIO.parse('./homo.fa',"fasta"):
seqid=seqi.id
seqseq=seqi.seq
seqdict[seqid]=seqseq
ouc=open(ou1,'w')
count=0
with open(in1,'r') as i:
li=i.readlines()
for lin in li:
linc=lin.strip().split("\t")
count=count+1
poschr=linc[1]
pospos=linc[2]
basepos=seqdict[poschr][int(pospos)]
ouc.write(str(linc[1])+"\t"+str(linc[2])+"\tINV0000000"+str(count)+"\t"+str(basepos)+"\t<INV>\t1\tPASS\tPRECISE;SVTYPE=INV;SVMETHOD=EMBL.DELLYv0.8.7;""\n")
网友评论