美文网首页小资料工程师乐园
python: 朴素贝叶斯分类器的简单实现

python: 朴素贝叶斯分类器的简单实现

作者: luffynoonepiece | 来源:发表于2020-07-03 10:54 被阅读0次

    以下是李航的《统计学习方法》P50 例题的python实现,分为贝叶斯估计和极大似然估计。

    import pandas as pd
    import numpy as np
    from collections import Counter
    #分贝叶斯估计与极大似然估计两种计算结果
    class MLE():
        
        def Bayesian_estimation(self,np_arr):
            #print(np_arr)
            #已知类标记为最后一列
            list_y_Ck = list(np_arr[-1])
            #赋值给y_Ck变量并去重
            y_Ck = list(set(np_arr[-1]))
            #print(y_Ck)
            y_Count = len(list_y_Ck)
            #分类数
            #print(y_Count)
            y_dict = {}
            for i in y_Ck:
                y_dict[i] = list_y_Ck.count(i)/y_Count
            #print(y_dict)
    
            yx_CNT = []
            for i in y_dict.keys():
                y_Ck_CNT = list_y_Ck.count(i)
                #print(y_Ck_CNT)
                for j in range(0,np_arr.shape[0]-1):
                    list_Temp = list(np_arr[j])
                    list_Temp_new = []
                    for k in range(len(list_Temp)):
                        if list(np_arr[-1])[k] == i:
                            list_Temp_new.append(list_Temp[k])
                    #print(list_Temp_new)
                    x_j_count = Counter(list_Temp_new)
                    yx_CNT.append([i, y_Ck_CNT, j, dict(x_j_count)])
            #print(yx_CNT)
    
            for i in yx_CNT:
                for j in i[3].keys():
                    i[3][j] = i[3][j]/i[1]
            return y_dict,yx_CNT
        
        def forecast(self,features,path):
            data = pd.read_csv(path)
            print(data)
            np_arr = np.array(data.values)
            y_dict,yx_CNT = self.Bayesian_estimation(np_arr.T)
            print('1. 极大似然估计法')
            print('用于预测的特征值为: {}'.format(features))
            yx_p_arr = np.array(yx_CNT)
            #print(yx_p_arr)
    
            res1={}
            for key in y_dict.keys():
                res1[key]=1*y_dict[key]
                for i in range(0,len(features)):
                    res1[key]=res1[key]*yx_p_arr[(yx_p_arr[:,0]==key)&(yx_p_arr[:,2]==i)][:,3][0][features[i]]
            print(res1)
            
            return '预测结果为 {}'.format(max(res1,key = res1.get))
    
    class BE():
    
        def Bayesian_estimation(self,np_arr):
            #print(np_arr)
            #已知类标记为最后一列
            list_y_Ck = list(np_arr[-1])
            #赋值给y_Ck变量并去重
            y_Ck = list(set(np_arr[-1]))
            #print(y_Ck)
            y_Count = len(list_y_Ck)
            #分类数
            #print(y_Count)
    
            y_dict = {}
            lamda = 1
            for i in y_Ck:
                y_dict[i] = (list_y_Ck.count(i) + lamda)/(y_Count + len(set(list_y_Ck)))
            #print(len(y_dict))
            k_ = len(y_dict)
            k_lamda = k_ * lamda
    
            yx_CNT = []
            for i in y_dict.keys():
                y_Ck_CNT = list_y_Ck.count(i)
                #print(y_Ck_CNT)
                for j in range(0,np_arr.shape[0]-1):
                    list_Temp = list(np_arr[j])
                    list_Temp_new = []
                    for k in range(len(list_Temp)):
                        if list(np_arr[-1])[k] == i:
                            list_Temp_new.append(list_Temp[k])
                    #print(list_Temp_new)
                    x_j_count = Counter(list_Temp_new)
                    dit_x_j = dict(x_j_count)
                    for z in dit_x_j.keys():
                        dit_x_j[z] = dit_x_j[z] + lamda
                    yx_CNT.append([i, y_Ck_CNT + lamda, j, dit_x_j])
            #print(yx_CNT)
    
            for i in yx_CNT:
                for j in i[3].keys():
                    i[3][j] = i[3][j]/(i[1] + k_lamda)
            return y_dict,yx_CNT
        
        def forecast(self,features,path):
            data = pd.read_csv(path)
            #print(data)
            np_arr = np.array(data.values)
            y_dict,yx_CNT = self.Bayesian_estimation(np_arr.T)
            #print(y_dict)
            #print(yx_CNT)
            print('2. 贝叶斯估计法')
            print('用于预测的特征值为: {}'.format(features))
            yx_p_arr = np.array(yx_CNT)
            #print(yx_p_arr)
    
            res1={}
            for key in y_dict.keys():
                res1[key]=1*y_dict[key]
                for i in range(0,len(features)):
                    res1[key]=res1[key]*yx_p_arr[(yx_p_arr[:,0]==key)&(yx_p_arr[:,2]==i)][:,3][0][features[i]]
            print(res1)
            
            return '预测结果为 {}'.format(max(res1,key = res1.get))
    
    
    if __name__ == '__main__':
        t = MLE()
        path = 'E:\\leetcode\\朴素贝叶斯分类器\\bayes.csv'
        features = [2,'S']
        print(t.forecast(features,path))
        print('\n')
        b = BE()
        print(b.forecast(features,path))
    

    运算结果为:

       x1 x2  Y
    0    1  S -1
    1    1  M -1
    2    1  M  1
    3    1  S  1
    4    1  S -1
    5    2  S -1
    6    2  M -1
    7    2  M  1
    8    2  L  1
    9    2  L  1
    10   3  L  1
    11   3  M  1
    12   3  M  1
    13   3  L  1
    14   3  L -1
    1. 极大似然估计法
    用于预测的特征值为: [2, 'S']
    {1: 0.02222222222222222, -1: 0.06666666666666667}
    预测结果为 -1
    
    
    2. 贝叶斯估计法
    用于预测的特征值为: [2, 'S']
    {1: 0.0326797385620915, -1: 0.06100217864923746}
    预测结果为 -1
    
    蟹蟹

    相关文章

      网友评论

        本文标题:python: 朴素贝叶斯分类器的简单实现

        本文链接:https://www.haomeiwen.com/subject/pnojqktx.html