# ----ROSLIND_10: ----
# Consensus and Profile
with open("10_Consensus and Profile.txt") as f:
DNA_strings = f.readlines()
DNA_strings = [x.strip("\n") for x in DNA_strings]
def myConPro(sample_File):
newDNA_strings = []
DNA_matrix = []
DNA_profile = []
DNA_profile_t = []
# 获取ROSname
ROSname = [x for x in DNA_strings if "Rosalind" in x]
ROSindex = [DNA_strings.index(x) for x in ROSname]
for i in range(0, len(DNA_strings), int(ROSindex[1]) - int(ROSindex[0])):
newDNA_strings.append(DNA_strings[i: i + (int(ROSindex[1]) - int(ROSindex[0]))])
DNA_matrix = ["".join( x[1:] ) for x in newDNA_strings]
# DNA_matrix = ["".join(x) for x in DNA_matrix] # 合并子集中的字符串,并不是所有字符串都是单行
#转置
for i in range(len(DNA_matrix[0])):
for j in range(len(DNA_matrix)):
DNA_profile.append(DNA_matrix[j][i])
DNA_profile_t = [DNA_profile[i: i+len(DNA_matrix[0])] for i in range(0, len(DNA_profile), len(DNA_matrix[0]))]
count_a = []
count_t = []
count_c = []
count_g = []
for i in range(len(DNA_profile_t)):
count_a.append(DNA_profile_t[i].count("A"))
count_t.append(DNA_profile_t[i].count("T"))
count_c.append(DNA_profile_t[i].count("C"))
count_g.append(DNA_profile_t[i].count("G"))
# 返回ATCG在每一列的count
res = [count_a, count_t, count_c, count_g]
# 返回ATCG在每一列的count
base = "ATCG"
rowcount = {base[i]: res[i] for i in range(len(base))}
# 返回ATCG所在列的count数
for k, v in rowcount.items():
print(k, v)
#遍历字典根据每一列最大值返回键值
Consensus = []
for i in range(len(list(rowcount.values())[0])):
Consensus.append({"A": list(rowcount.values())[0][i],
"T": list(rowcount.values())[1][i],
"C": list(rowcount.values())[2][i],
"G": list(rowcount.values())[3][i]})
# 最大值返回键值
result = []
for i in range(len(Consensus)):
result.append( max(Consensus[i], key = Consensus[i].get))
return "".join(result)
![](https://img.haomeiwen.com/i12544845/940e67b6959c29f0.png)
image.png
网友评论