使用python的模块edlib,可实现要比较的两种变异的全局比对(mode=“HW”),
通过输入要比较的两种标记的ref,alt;分别获得对应的全局比对后的格式,然后再做比较。
比如:
![](https://img.haomeiwen.com/i14201035/8491872ec426a46e.png)
import sys
import edlib
def diff(s, t):
ref, alt = [], []
flag = False
if len(s) < len(t): flag, s, t = True, t, s
t = t + " "*(len(s)-len(t))
aln = edlib.align(s, t, mode="HW", task = "path")
match = edlib.getNiceAlignment(aln, s, t)['matched_aligned']
# print(match)
path = len(match)
for i in range(path):
if match[i] == "|": continue
elif match[i] == "." or match[i] == "-":
ref.append(s[i])
alt.append(t[i])
# print(edlib.getNiceAlignment(aln, s, t))
if flag:
a="".join(alt), "".join(ref)
else:
a="".join(ref), "".join(alt)
return a
with open('diff_clinv_dbsnp.xls', 'r') as multi_file:
lines = multi_file.readlines()
with open('output.xls', 'w') as single_file:
for line in lines:
if line.startswith('V1'):
single_file.write(line)
continue
columns = line.strip().split('\t')
custom_cols = columns[:3]
chromosome, position, ref_base, alt_base = columns[3:7]
other_cols = columns[7:]
ref1 = other_cols[0]
alt1 = other_cols[1]
glo = diff(ref_base,alt_base)
clinvar = diff(ref1,alt1)
if glo == clinvar:
same_status = 'Y'
else:
same_status = 'N'
single_file.write('\t'.join(custom_cols + [chromosome, str(int(position)), str(glo), str(clinvar), ref_base, alt_base] + other_cols + [same_status]) + '\n')
网友评论