全局比对-python

作者: caokai001 | 来源:发表于2018-11-30 22:39 被阅读4次

    sequence1="AGWGAHEA"
    sequence2 = "PAWHEAEAG"
    gap= -8
    Blosum50


    import numpy as np
    import pandas as pd
    import os
    sequence1="AGWGAHEA"
    sequence2 = "PAWHEAEAG"
    s1=''
    s2=''
    gap=-8
    os.chdir(r"C:/Users/16926/Desktop/")
    score_matrix=pd.read_excel("sample.xlsx")
    best_matrix=np.empty(shape= (len(sequence2)+1,len(sequence1)+1),dtype = int)
    
    def get_match_score(s1,s2):
        score=score_matrix[s1][s2]
        return score
    
    for i in range(len(sequence2)+1):
        for j in range(len(sequence1)+1):
            if i==0:
                best_matrix[i][j]=gap*j
            elif j==0:
                best_matrix[i][j]=gap*i
            else:
                match =get_match_score(sequence2[i-1],sequence1[j-1])
                gap1_score=best_matrix[i-1][j]+gap
                gap2_score = best_matrix[i][j-1]+gap
                match_score = best_matrix[i-1][j-1]+match
                best_matrix[i][j] = max(gap1_score,gap2_score,match_score)
    print(best_matrix)
    i,j = len(sequence2),len(sequence1)
    while(i>0 or j>0):
        match = get_match_score(sequence2[i-1],sequence1[j-1])
        if i>0 and j>0 and best_matrix[i][j] == best_matrix[i-1][j-1]+match:
            s1 += sequence1[j-1]
            s2 += sequence2[i-1]
            i-=1;j-=1
        elif i>0 and best_matrix[i,j] == best_matrix[i-1,j]+gap:
            s1+='-'
            s2+=sequence2[i-1]
            i-=1
        else:
            s1+=sequence1[j-1]
            s2+='-'
            j-=1
    print(s1[::-1]+'\n'+s2[::-1])
    
    [[  0  -8 -16 -24 -32 -40 -48 -56 -64]
     [ -8  -1  -9 -17 -25 -33 -41 -49 -57]
     [-16  -3 -11 -19 -27 -20 -28 -36 -44]
     [-24 -11  -6   4  -4 -12 -20 -28 -36]
     [-32 -19 -13  -4   2  -6  -2 -10 -18]
     [-40 -27 -21 -12  -6   1  -7   4  -4]
     [-48 -35 -29 -20 -14  -1  -9  -4   9]
     [-56 -43 -37 -28 -22  -9 -17  -3   1]
     [-64 -51 -45 -36 -30 -17 -25 -11   2]
     [-72 -59 -43 -44 -28 -25 -33 -19  -6]]
    -AGWG-AHEA-
    PA-WHEA-EAG
    
    score_matrix
        A   E   G   H   P   W
    A   5.0 -1.0    0.0 -2.0    -1.0    -3
    E   NaN 6.0 -3.0    0.0 -1.0    -3
    G   NaN NaN 8.0 -2.0    -2.0    -3
    H   NaN NaN NaN 10.0    -2.0    -3
    P   NaN NaN NaN NaN 10.0    -4
    W   NaN NaN NaN NaN NaN 15
    

    相关文章

      网友评论

        本文标题:全局比对-python

        本文链接:https://www.haomeiwen.com/subject/mdikcqtx.html