# ----ROSALIND_14: ----
# Finding a Shared Motif
with open("14_Finding a Shared Motif.txt") as f:
DNA_file = f.readlines()
DNA_file = [i.strip("\n") for i in DNA_file]
DNA_file
![](https://img.haomeiwen.com/i12544845/260c9eb42dcc83b7.png)
image.png
def max_lcs(file):
ROSname = []
ROSindex = []
DNAstring = []
newDNAstring = []
ROSname = [x for x in file if "Rosalind" in x]
ROSindex = [file.index(x) for x in ROSname ] # 返回下标
for i in range(0,len(file),(ROSindex[1] - ROSindex[0])):
DNAstring.append(file[i: i + (ROSindex[1] - ROSindex[0])])
# 合并ROSname之后的字符串
newDNAstring = ["".join(i[1:]) for i in DNAstring]
DNAstring_dict = {}
DNAstring_dict = dict(zip(ROSname,newDNAstring))
# 遍历字典,返回最小值
DNAstring_dict_values = []
DNAstring_dict_values = list(DNAstring_dict.values())
DNAstring_dict_values.sort(reverse = False) # reverse = True 按照升序排列
# 返回最短字符串
min_DNAstring_dict_values = DNAstring_dict_values[0]
# 在原序列中删除最短字符串
DNAstring_dict_values.remove(min_DNAstring_dict_values)
# 找到最短字符串中的所有子字符串
results = []
for i in range(len(min_DNAstring_dict_values)):
# j表示滑动量
for j in range(len(min_DNAstring_dict_values) - i):
results.append(min_DNAstring_dict_values[j:j + i + 1])
# 检索最短字符串中的所有子字符串在剩余字符串中出现的次数
# 创建一个新字典,统计次数
count_dict = {}
for i in results:
count_dict[i] = 0 # 初始化
# 如果子字符串出现在其他字符串中,则count + 1,并创建新的字典
for j in DNAstring_dict_values:
if i in j:
count_dict[i] += 1
# 遍历字典返回最大值,此时max_count_dict里面都是在剩余字符串中出现次数最多的子字符串
max_count_dict = {}
for k,v in count_dict.items():
if v == max(count_dict.values()):
max_count_dict[k] = v
max_count_k = [i for i in list(max_count_dict.keys())]
# 根据字符串长度降序排列,最终返回最长子字符串
max_count_k.sort(key=lambda x:len(x), reverse= True)
return print(max_count_k[0])
![](https://img.haomeiwen.com/i12544845/0e1c011761d34f6d.png)
image.png
网友评论