需要优化
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
in1=sys.argv[1]
ou1=sys.argv[2]
ouc=open(ou1,'w')
with open(in1,'r') as i:
li=i.readlines()
seq=''
for lin in li:
linc=lin.strip().split('\n')
if str(linc[0][0]) == ">":
ouc.write(str(seq)+"\n")
seq=''
ouc.write(str(linc[0])+"\n")
else:
seq=seq+str(linc[0])
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
in1=sys.argv[1] # 参考基因
ou1=sys.argv[2] # 输出文件
from Bio import SeqIO
from Bio.Seq import Seq
dicti={}
with open(in1,'r') as i:
li=i.readlines()
for lin in li[1:]:
linc=lin.strip().split('\t')
fil=str(linc[0])
posnum=str(linc[1])
if fil not in dicti.keys():
dicti[fil]=[]
dicti[fil].append(posnum)
elif fil in dicti.keys():
dicti[fil].append(posnum)
print(str(dicti))
outc=open(ou1,'w')
for keys,values in dicti.items():
filename=str(keys)
for h in SeqIO.parse(filename,"fasta"):
idc=h.id
iseq=str(h.seq)
for posi in values:
if "hpv18" in filename:
if str(posi)=="7857":
print(str(posi))
print("1")
lpos1=int(7857-1-150)
rpos1=int(7857)
lpos2=int(0)
rpos2=int(150)
trimfa_5p=str(iseq[lpos1:rpos1])
print(str(trimfa_5p))
trimfa_3p=str(iseq[0:150])
print(str(trimfa_3p))
outc.write(">"+str(filename)+"_"+str(int(posi))+"_5p\n"+str(trimfa_5p)+"\n>"+str(filename)+"_"+str(int(posi))+"_3p\n"+str(trimfa_3p)+"\n")
else:
print(str(posi))
print("2")
lpos=int(float(int(posi))-1-150)
rpos=int(float(int(posi))-1+150)
trimfa_5p=str(iseq[lpos:int(posi)])
trimfa_3p=str(iseq[int(posi):rpos])
outc.write(">"+str(filename)+"_"+str(int(posi))+"_5p\n"+str(trimfa_5p)+"\n"+">"+str(filename)+"_"+str(int(posi))+"_3p\n"+str(trimfa_3p)+"\n")
else:
print(str(posi))
print("3")
lpos=int(float(int(posi))-1-150)
rpos=int(float(int(posi))-1+150)
trimfa_5p=str(iseq[lpos:int(posi)])
trimfa_3p=str(iseq[int(posi):rpos])
outc.write(">"+str(filename)+"_"+str(int(posi))+"_5p\n"+str(trimfa_5p)+"\n"+">"+str(filename)+"_"+str(int(posi))+"—3p\n"+str(trimfa_3p)+"\n")
网友评论