·

作者: 你的仙女本仙 | 来源:发表于2020-04-23 17:45 被阅读0次
    基于质心的算法 代表 K-means
    from sklearn.cluster import KMeans
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.datasets import load_iris
    from sklearn.manifold import TSNE
    
    df=pd.read_excel("lip_cluster.xlsx")
    df1=df[['price','number_pay','amount','adress']]#选中数据框某几列
    print(np.isnan(df1).any())
    df1nona=df1[df1['amount'].notna()]#空值删除整行
    print(np.isnan(df1nona).any())#检查是否含有空行
    
    scale=MinMaxScaler().fit(df1nona)
    df1_scale=scale.transform(df1nona)
    
    plt.scatter(df1_scale[:, 2], df1_scale[:, 3],##原始数据散点图
                c = "red", marker='o', label='see')
    plt.xlabel('lip number_pay')
    plt.ylabel('lip amount')
    plt.legend(loc=2)
    plt.show()
    
    kmeans=KMeans(n_clusters=3).fit(df1_scale)#构造聚类器,estimator初始化Kmeans聚类;estimator.fit聚类内容拟合;
    label_pred = kmeans.labels_ #获取聚类标签
    centroids=kmeans.cluster_centers_#获取聚类中心
    inertia = kmeans.inertia_ # 获取聚类准则的总和
    ssa=kmeans.inertia_#组内平方和
    y_kmeans2=kmeans.predict(df1_scale)
    
    plt.scatter(df1_scale[:, 2], df1_scale[:, 3],
                c=y_kmeans2, marker='p', cmap='rainbow', linewidths=4)
    plt.show()
    
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax1 = plt.axes(projection='3d')
    ax1.scatter3D(df1nona.values[:, 0], df1nona.values[:, 1],
                  df1nona.values[:, 3], cmap='Blues')  #绘制散点图
    plt.show()
    
    
    
    
    import numpy as np
    class HiddenMarkov:
        def forward(self, Q, V, A, B, O, PI):  # 使用前向算法
            N = len(Q)  #可能存在的状态数量
            M = len(O)  # 观测序列的大小
            alphas = np.zeros((N, M))  # alpha值
            T = M  # 有几个时刻,有几个观测序列,就有几个时刻
            for t in range(T):  # 遍历每一时刻,算出alpha值
                indexOfO = V.index(O[t])  # 找出序列对应的索引
                for i in range(N):
                    if t == 0:  # 计算初值
                        alphas[i][t] = PI[t][i] * B[i][indexOfO]  # P176(10.15)
                        print(
                            'alpha1(%d)=p%db%db(o1)=%f' % (i, i, i, alphas[i][t]))
                    else:
                        alphas[i][t] = np.dot(
                            [alpha[t - 1] for alpha in alphas],
                            [a[i] for a in A]) * B[i][indexOfO]  # 对应P176(10.16)
                        print('alpha%d(%d)=[sigma alpha%d(i)ai%d]b%d(o%d)=%f' %
                              (t, i, t - 1, i, i, t, alphas[i][t]))
                        print(alphas)
            P = np.sum([alpha[M - 1] for alpha in alphas])  # P176(10.17)
    
        def backward(self, Q, V, A, B, O, PI):  # 后向算法
            N = len(Q)  # 可能存在的状态数量
            M = len(O)  # 观测序列的大小
            betas = np.ones((N, M))  # beta
            for i in range(N):
                print('beta%d(%d)=1' % (M, i))
            for t in range(M - 2, -1, -1):
                indexOfO = V.index(O[t + 1])  # 找出序列对应的索引
                for i in range(N):
                    betas[i][t] = np.dot(
                        np.multiply(A[i], [b[indexOfO] for b in B]),
                        [beta[t + 1] for beta in betas])
                    realT = t + 1
                    realI = i + 1
                    print(
                        'beta%d(%d)=[sigma a%djbj(o%d)]beta%d(j)=(' %
                        (realT, realI, realI, realT + 1, realT + 1),
                        end='')
                    for j in range(N):
                        print(
                            "%.2f*%.2f*%.2f+" % (A[i][j], B[j][indexOfO],
                                                 betas[j][t + 1]),
                            end='')
                    print("0)=%.3f" % betas[i][t])
            # print(betas)
            indexOfO = V.index(O[0])
            P = np.dot(
                np.multiply(PI, [b[indexOfO] for b in B]),
                [beta[0] for beta in betas])
            print("P(O|lambda)=", end="")
            for i in range(N):
                print(
                    "%.1f*%.1f*%.5f+" % (PI[0][i], B[i][indexOfO], betas[i][0]),
                    end="")
            print("0=%f" % P)
    
        def viterbi(self, Q, V, A, B, O, PI):
            N = len(Q)  # 可能存在的状态数量
            M = len(O)  # 观测序列的大小
            deltas = np.zeros((N, M))
            psis = np.zeros((N, M))
            I = np.zeros((1, M))
            for t in range(M):
                realT = t + 1
                indexOfO = V.index(O[t])  # 找出序列对应的索引
                for i in range(N):
                    realI = i + 1
                    if t == 0:
                        deltas[i][t] = PI[0][i] * B[i][indexOfO]
                        psis[i][t] = 0
                        print('delta1(%d)=pi%d * b%d(o1)=%.2f * %.2f=%.2f' %
                              (realI, realI, realI, PI[0][i], B[i][indexOfO],
                               deltas[i][t]))
                        print('psis1(%d)=0' % (realI))
                    else:
                        deltas[i][t] = np.max(
                            np.multiply([delta[t - 1] for delta in deltas],
                                        [a[i] for a in A])) * B[i][indexOfO]
                        print(
                            'delta%d(%d)=max[delta%d(j)aj%d]b%d(o%d)=%.2f*%.2f=%.5f'
                            % (realT, realI, realT - 1, realI, realI, realT,
                               np.max(
                                   np.multiply([delta[t - 1] for delta in deltas],
                                               [a[i] for a in A])), B[i][indexOfO],
                               deltas[i][t]))
                        psis[i][t] = np.argmax(
                            np.multiply(
                                [delta[t - 1] for delta in deltas],
                                [a[i]
                                 for a in A])) + 1  # 由于其返回的是索引,因此应+1才能和正常的下标值相符合。
                        print('psis%d(%d)=argmax[delta%d(j)aj%d]=%d' %
                              (realT, realI, realT - 1, realI, psis[i][t]))
            print(deltas)
            print(psis)
            I[0][M - 1] = np.argmax([delta[M - 1] for delta in deltas
                                     ]) + 1  # 由于其返回的是索引,因此应+1才能和正常的下标值相符合。
            print('i%d=argmax[deltaT(i)]=%d' % (M, I[0][M - 1]))
            for t in range(M - 2, -1, -1):
                I[0][t] = psis[int(I[0][t + 1]) - 1][t + 1]
                print('i%d=psis%d(i%d)=%d' % (t + 1, t + 2, t + 2, I[0][t]))
            print("状态序列I:", I)
    
    例子
    Q = [1, 2, 3]
    V = ['红', '白']
    A = [[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]]
    B = [[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]
    # O = ['红', '白', '红', '红', '白', '红', '白', '白']
    O = ['红', '白', '红', '白']    #习题10.1的例子
    PI = [[0.2, 0.4, 0.4]]
    
    HMM = HiddenMarkov()
    HMM.viterbi(Q, V, A, B, O, PI)
    
    delta1(1)=pi1 * b1(o1)=0.20 * 0.50=0.10
    psis1(1)=0
    delta1(2)=pi2 * b2(o1)=0.40 * 0.40=0.16
    psis1(2)=0
    delta1(3)=pi3 * b3(o1)=0.40 * 0.70=0.28
    psis1(3)=0
    delta2(1)=max[delta1(j)aj1]b1(o2)=0.06*0.50=0.02800
    psis2(1)=argmax[delta1(j)aj1]=3
    delta2(2)=max[delta1(j)aj2]b2(o2)=0.08*0.60=0.05040
    psis2(2)=argmax[delta1(j)aj2]=3
    delta2(3)=max[delta1(j)aj3]b3(o2)=0.14*0.30=0.04200
    psis2(3)=argmax[delta1(j)aj3]=3
    delta3(1)=max[delta2(j)aj1]b1(o3)=0.02*0.50=0.00756
    psis3(1)=argmax[delta2(j)aj1]=2
    delta3(2)=max[delta2(j)aj2]b2(o3)=0.03*0.40=0.01008
    psis3(2)=argmax[delta2(j)aj2]=2
    delta3(3)=max[delta2(j)aj3]b3(o3)=0.02*0.70=0.01470
    psis3(3)=argmax[delta2(j)aj3]=3
    delta4(1)=max[delta3(j)aj1]b1(o4)=0.00*0.50=0.00189
    psis4(1)=argmax[delta3(j)aj1]=1
    delta4(2)=max[delta3(j)aj2]b2(o4)=0.01*0.60=0.00302
    psis4(2)=argmax[delta3(j)aj2]=2
    delta4(3)=max[delta3(j)aj3]b3(o4)=0.01*0.30=0.00220
    psis4(3)=argmax[delta3(j)aj3]=3
    [[0.1      0.028    0.00756  0.00189 ]
     [0.16     0.0504   0.01008  0.003024]
     [0.28     0.042    0.0147   0.002205]]
    [[0. 3. 2. 1.]
     [0. 3. 2. 2.]
     [0. 3. 3. 3.]]
    i4=argmax[deltaT(i)]=2
    i3=psis4(i4)=2
    i2=psis3(i3)=2
    i1=psis2(i2)=3
    状态序列I: [[3. 2. 2. 2.]]
    

    相关文章

      网友评论

          本文标题:·

          本文链接:https://www.haomeiwen.com/subject/mvoaihtx.html