(接上篇)
前言:现有长度相等,但碱基存在一定差异的两条DNA序列,求它们之间的最长相同子序列,并输出差异碱基的位置。
代码如下:
import os,sys
file = sys.argv[1]
outdir = sys.argv[2]
os.system('mkdir %s'%(outdir))
def HitsParser(f):
load = open(f,'r')
Hits = [line for line in load.readlines()]
All_align = []
for i in range(1,len(Hits)):
align = {}
align['query.seq'] = Hits[i].strip().split('\t')[12]
align['subject.seq'] = Hits[i].strip().split('\t')[13]
All_align.append(align)
return All_align
def DiffPos(f,n):
str1 = HitsParser(f)[n]['query.seq']
str2 = HitsParser(f)[n]['subject.seq']
length = len(str1)
All_pos = []
for i in range(length):
if str1[i] != str2[i]:
pos = [str(i),str1[i],str2[i]]
All_pos.append(pos)
return All_pos
Header = 'Position\tQuery.Base\tSubject.base\n'
All_hits = HitsParser(file)
Hits_num = len(All_hits)
for j in range(Hits_num):
L = DiffPos(file,j)
S = Header+'\n'.join(['\t'.join(i) for i in L])+'\n'
with open('%s/DiffPos_%s.txt'%(outdir,str(j)),'w') as g:
g.write(S)
g.close()
网友评论